1<html> 2<head> 3<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> 4<title>Univariate Statistics</title> 5<link rel="stylesheet" href="../math.css" type="text/css"> 6<meta name="generator" content="DocBook XSL Stylesheets V1.79.1"> 7<link rel="home" href="../index.html" title="Math Toolkit 2.12.0"> 8<link rel="up" href="../statistics.html" title="Chapter 6. Statistics"> 9<link rel="prev" href="../statistics.html" title="Chapter 6. Statistics"> 10<link rel="next" href="bivariate_statistics.html" title="Bivariate Statistics"> 11</head> 12<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"> 13<table cellpadding="2" width="100%"><tr> 14<td valign="top"><img alt="Boost C++ Libraries" width="277" height="86" src="../../../../../boost.png"></td> 15<td align="center"><a href="../../../../../index.html">Home</a></td> 16<td align="center"><a href="../../../../../libs/libraries.htm">Libraries</a></td> 17<td align="center"><a href="http://www.boost.org/users/people.html">People</a></td> 18<td align="center"><a href="http://www.boost.org/users/faq.html">FAQ</a></td> 19<td align="center"><a href="../../../../../more/index.htm">More</a></td> 20</tr></table> 21<hr> 22<div class="spirit-nav"> 23<a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a> 24</div> 25<div class="section"> 26<div class="titlepage"><div><div><h2 class="title" style="clear: both"> 27<a name="math_toolkit.univariate_statistics"></a><a class="link" href="univariate_statistics.html" title="Univariate Statistics">Univariate Statistics</a> 28</h2></div></div></div> 29<h4> 30<a name="math_toolkit.univariate_statistics.h0"></a> 31 <span class="phrase"><a name="math_toolkit.univariate_statistics.synopsis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.synopsis">Synopsis</a> 32 </h4> 33<pre class="programlisting"><span class="preprocessor">#include</span> <span class="special"><</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">></span> 34 35<span class="keyword">namespace</span> <span class="identifier">boost</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">math</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">statistics</span> <span class="special">{</span> 36 37 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 38 <span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 39 40 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 41 <span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 42 43 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 44 <span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 45 46 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 47 <span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 48 49 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 50 <span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 51 52 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 53 <span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 54 55 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 56 <span class="keyword">auto</span> <span class="identifier">mean_and_sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 57 58 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 59 <span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 60 61 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 62 <span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 63 64 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 65 <span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 66 67 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 68 <span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 69 70 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 71 <span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 72 73 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 74 <span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 75 76 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 77 <span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 78 79 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 80 <span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 81 82 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 83 <span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 84 85 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 86 <span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 87 88 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">RandomAccessIterator</span><span class="special">></span> 89 <span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">iterator_traits</span><span class="special"><</span><span class="identifier">RandomAccessIterator</span><span class="special">>::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special"><</span><span class="identifier">Real</span><span class="special">>::</span><span class="identifier">quiet_NaN</span><span class="special">());</span> 90 91 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">RandomAccessContainer</span><span class="special">></span> 92 <span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">RandomAccessContainer</span> <span class="identifier">v</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">RandomAccessContainer</span><span class="special">::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special"><</span><span class="identifier">Real</span><span class="special">>::</span><span class="identifier">quiet_NaN</span><span class="special">());</span> 93 94 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">RandomAccessIterator</span><span class="special">></span> 95 <span class="keyword">auto</span> <span class="identifier">interquartile_range</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 96 97 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">RandomAccessContainer</span><span class="special">></span> 98 <span class="keyword">auto</span> <span class="identifier">interquartile_range</span><span class="special">(</span><span class="identifier">RandomAccessContainer</span> <span class="identifier">v</span><span class="special">);</span> 99 100 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 101 <span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 102 103 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 104 <span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 105 106 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span> 107 <span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span> 108 109 <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span> 110 <span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span> 111 112<span class="special">}}}</span> 113</pre> 114<h4> 115<a name="math_toolkit.univariate_statistics.h1"></a> 116 <span class="phrase"><a name="math_toolkit.univariate_statistics.description"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.description">Description</a> 117 </h4> 118<p> 119 The file <code class="computeroutput"><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> is a 120 set of facilities for computing scalar values from vectors. 121 </p> 122<p> 123 Many of these functionals have trivial naive implementations, but experienced 124 programmers will recognize that even trivial algorithms are easy to screw up, 125 and that numerical instabilities often lurk in corner cases. We have attempted 126 to do our "due diligence" to root out these problems-scouring the 127 literature for numerically stable algorithms for even the simplest of functionals. 128 </p> 129<p> 130 <span class="emphasis"><em>Nota bene</em></span>: Some similar functionality is provided in 131 <a href="https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html" target="_top">Boost 132 Accumulators Framework</a>. These accumulators should be used in real-time 133 applications; <code class="computeroutput"><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> should 134 be used when CPU vectorization is needed. As a reminder, remember that to actually 135 <span class="emphasis"><em>get</em></span> vectorization, compile with <code class="computeroutput"><span class="special">-</span><span class="identifier">march</span><span class="special">=</span><span class="identifier">native</span> 136 <span class="special">-</span><span class="identifier">O3</span></code> 137 flags. 138 </p> 139<p> 140 We now describe each functional in detail. Our examples use <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span></code> 141 to hold the data, but this not required. In general, you can store your data 142 in an Eigen array, and Armadillo vector, <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">array</span></code>, 143 and for many of the routines, a <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">forward_list</span></code>. 144 These routines are usable in float, double, long double, and Boost.Multiprecision 145 precision, as well as their complex extensions whenever the computation is 146 well-defined. For certain operations (total variation, for example) integer 147 inputs are supported. 148 </p> 149<h4> 150<a name="math_toolkit.univariate_statistics.h2"></a> 151 <span class="phrase"><a name="math_toolkit.univariate_statistics.mean"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.mean">Mean</a> 152 </h4> 153<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 154<span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span> 155<span class="comment">// Alternative syntax if you want to use entire container:</span> 156<span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 157</pre> 158<p> 159 The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham 160 1.6a</a>. The data is not modified and must be forward iterable. Works 161 with real and integer data. If the input is an integer type, the output is 162 a double precision float. 163 </p> 164<h4> 165<a name="math_toolkit.univariate_statistics.h3"></a> 166 <span class="phrase"><a name="math_toolkit.univariate_statistics.variance"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.variance">Variance</a> 167 </h4> 168<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 169<span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span> 170</pre> 171<p> 172 If you don't need to calculate on a subset of the input, then the range call 173 is more terse: 174 </p> 175<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 176<span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 177</pre> 178<p> 179 The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham 180 1.6b</a>. The input data must be forward iterable and the range <code class="computeroutput"><span class="special">[</span><span class="identifier">first</span><span class="special">,</span> 181 <span class="identifier">last</span><span class="special">)</span></code> 182 must contain at least two elements. It is <span class="emphasis"><em>not</em></span> in general 183 sensible to pass complex numbers to this routine. If integers are passed as 184 input, then the output is a double precision float. 185 </p> 186<p> 187 <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span></code> 188 returns the population variance. If you want a sample variance, use 189 </p> 190<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 191<span class="identifier">Real</span> <span class="identifier">sn_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 192</pre> 193<h4> 194<a name="math_toolkit.univariate_statistics.h4"></a> 195 <span class="phrase"><a name="math_toolkit.univariate_statistics.skewness"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.skewness">Skewness</a> 196 </h4> 197<p> 198 Computes the skewness of a dataset: 199 </p> 200<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 201<span class="keyword">double</span> <span class="identifier">skewness</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">skewness</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 202<span class="comment">// skewness = 0.</span> 203</pre> 204<p> 205 The input vector is not modified, works with integral and real data. If the 206 input data is integral, the output is a double precision float. 207 </p> 208<p> 209 For a dataset consisting of a single constant value, we take the skewness to 210 be zero by definition. 211 </p> 212<p> 213 The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>. 214 </p> 215<h4> 216<a name="math_toolkit.univariate_statistics.h5"></a> 217 <span class="phrase"><a name="math_toolkit.univariate_statistics.kurtosis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.kurtosis">Kurtosis</a> 218 </h4> 219<p> 220 Computes the kurtosis of a dataset: 221 </p> 222<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 223<span class="keyword">double</span> <span class="identifier">kurtosis</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 224<span class="comment">// kurtosis = 17/10</span> 225</pre> 226<p> 227 The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>. 228 The input data must be forward iterable and must consist of real or integral 229 values. If the input data is integral, the output is a double precision float. 230 Note that this is <span class="emphasis"><em>not</em></span> the excess kurtosis. If you require 231 the excess kurtosis, use <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">excess_kurtosis</span></code>. This function simply subtracts 232 3 from the kurtosis, but it makes eminently clear our definition of kurtosis. 233 </p> 234<h4> 235<a name="math_toolkit.univariate_statistics.h6"></a> 236 <span class="phrase"><a name="math_toolkit.univariate_statistics.first_four_moments"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.first_four_moments">First 237 four moments</a> 238 </h4> 239<p> 240 Simultaneously computes the first four <a href="https://en.wikipedia.org/wiki/Central_moment" target="_top">central 241 moments</a> in a single pass through the data: 242 </p> 243<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 244<span class="keyword">auto</span> <span class="special">[</span><span class="identifier">M1</span><span class="special">,</span> <span class="identifier">M2</span><span class="special">,</span> <span class="identifier">M3</span><span class="special">,</span> <span class="identifier">M4</span><span class="special">]</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 245</pre> 246<h4> 247<a name="math_toolkit.univariate_statistics.h7"></a> 248 <span class="phrase"><a name="math_toolkit.univariate_statistics.median"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median">Median</a> 249 </h4> 250<p> 251 Computes the median of a dataset: 252 </p> 253<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 254<span class="keyword">double</span> <span class="identifier">m</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span> 255</pre> 256<p> 257 <span class="emphasis"><em>Nota bene: The input vector is modified.</em></span> The calculation 258 of the median is a thin wrapper around the C++11 <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>. Therefore, all requirements 259 of <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">nth_element</span></code> are inherited by the median calculation. 260 In particular, the container must allow random access. 261 </p> 262<h4> 263<a name="math_toolkit.univariate_statistics.h8"></a> 264 <span class="phrase"><a name="math_toolkit.univariate_statistics.median_absolute_deviation"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median_absolute_deviation">Median 265 Absolute Deviation</a> 266 </h4> 267<p> 268 Computes the <a href="https://en.wikipedia.org/wiki/Median_absolute_deviation" target="_top">median 269 absolute deviation</a> of a dataset: 270 </p> 271<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 272<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 273</pre> 274<p> 275 By default, the deviation from the median is used. If you have some prior that 276 the median is zero, or wish to compute the median absolute deviation from the 277 mean, use the following: 278 </p> 279<pre class="programlisting"><span class="comment">// prior is that center is zero:</span> 280<span class="keyword">double</span> <span class="identifier">center</span> <span class="special">=</span> <span class="number">0</span><span class="special">;</span> 281<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">center</span><span class="special">);</span> 282 283<span class="comment">// compute median absolute deviation from the mean:</span> 284<span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 285<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">mu</span><span class="special">);</span> 286</pre> 287<p> 288 <span class="emphasis"><em>Nota bene:</em></span> The input vector is modified. Again the vector 289 is passed into a call to <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>. 290 </p> 291<h4> 292<a name="math_toolkit.univariate_statistics.h9"></a> 293 <span class="phrase"><a name="math_toolkit.univariate_statistics.interquartile_range"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.interquartile_range">Interquartile 294 Range</a> 295 </h4> 296<p> 297 Computes the <a href="https://en.wikipedia.org/wiki/Interquartile_range" target="_top">interquartile 298 range</a> of a dataset: 299 </p> 300<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span> 301<span class="keyword">double</span> <span class="identifier">iqr</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">interquartile_range</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 302<span class="comment">// Q1 = 1.5, Q3 = 4.5 => iqr = 3</span> 303</pre> 304<p> 305 For a vector of length <span class="emphasis"><em>2n+1</em></span> or <span class="emphasis"><em>2n</em></span>, 306 the first quartile <span class="emphasis"><em>Q</em></span><sub>1</sub> is the median of the <span class="emphasis"><em>n</em></span> 307 smallest values, and the third quartile <span class="emphasis"><em>Q</em></span><sub>3</sub> is the median 308 of the <span class="emphasis"><em>n</em></span> largest values. The interquartile range is then 309 <span class="emphasis"><em>Q</em></span><sub>3</sub> - <span class="emphasis"><em>Q</em></span><sub>1</sub>. The function <code class="computeroutput"><span class="identifier">interquartile_range</span></code>, like the <code class="computeroutput"><span class="identifier">median</span></code>, calls into <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">nth_element</span></code>, 310 and hence partially sorts the data. 311 </p> 312<h4> 313<a name="math_toolkit.univariate_statistics.h10"></a> 314 <span class="phrase"><a name="math_toolkit.univariate_statistics.gini_coefficient"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.gini_coefficient">Gini 315 Coefficient</a> 316 </h4> 317<p> 318 Compute the Gini coefficient of a dataset: 319 </p> 320<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">};</span> 321<span class="keyword">double</span> <span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 322<span class="comment">// gini = 3/4</span> 323<span class="keyword">double</span> <span class="identifier">s_gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span> 324<span class="comment">// s_gini = 1.</span> 325<span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">w</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">};</span> 326<span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">w</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">w</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span> 327<span class="comment">// gini = 0, as all elements are now equal.</span> 328</pre> 329<p> 330 <span class="emphasis"><em>Nota bene</em></span>: The input data is altered: in particular, it 331 is sorted. Makes a call to <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">sort</span></code>, and 332 as such requires random access iterators. 333 </p> 334<p> 335 The sample Gini coefficient lies in the range [0,1], whereas the population 336 Gini coefficient is in the range [0, 1 - 1/ <span class="emphasis"><em>n</em></span>]. 337 </p> 338<p> 339 <span class="emphasis"><em>Nota bene:</em></span> There is essentially no reason to pass negative 340 values to the Gini coefficient function. However, a use case (measuring wealth 341 inequality when some people have negative wealth) exists, so we do not throw 342 an exception when negative values are encountered. You should have <span class="emphasis"><em>very</em></span> 343 good cause to pass negative values to the Gini coefficient calculator. Another 344 use case is found in signal processing, but the sorting is by magnitude and 345 hence has a different implementation. See <code class="computeroutput"><span class="identifier">absolute_gini_coefficient</span></code> 346 for details. 347 </p> 348<h4> 349<a name="math_toolkit.univariate_statistics.h11"></a> 350 <span class="phrase"><a name="math_toolkit.univariate_statistics.references"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.references">References</a> 351 </h4> 352<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "> 353<li class="listitem"> 354 Higham, Nicholas J. <span class="emphasis"><em>Accuracy and stability of numerical algorithms.</em></span> 355 Vol. 80. Siam, 2002. 356 </li> 357<li class="listitem"> 358 Philippe P. Pébay: <span class="quote">“<span class="quote">Formulas for Robust, One-Pass Parallel Computation 359 of Covariances and Arbitrary-Order Statistical Moments.</span>”</span> Technical 360 Report SAND2008-6212, Sandia National Laboratories, September 2008. 361 </li> 362</ul></div> 363</div> 364<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr> 365<td align="left"></td> 366<td align="right"><div class="copyright-footer">Copyright © 2006-2019 Nikhar 367 Agrawal, Anton Bikineev, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos, 368 Hubert Holin, Bruno Lalande, John Maddock, Jeremy Murphy, Matthew Pulver, Johan 369 Råde, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg, 370 Daryle Walker and Xiaogang Zhang<p> 371 Distributed under the Boost Software License, Version 1.0. (See accompanying 372 file LICENSE_1_0.txt or copy at <a href="http://www.boost.org/LICENSE_1_0.txt" target="_top">http://www.boost.org/LICENSE_1_0.txt</a>) 373 </p> 374</div></td> 375</tr></table> 376<hr> 377<div class="spirit-nav"> 378<a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a> 379</div> 380</body> 381</html> 382