1 // find_mean_and_sd_normal.cpp
2
3 // Copyright Paul A. Bristow 2007, 2010.
4
5 // Use, modification and distribution are subject to the
6 // Boost Software License, Version 1.0.
7 // (See accompanying file LICENSE_1_0.txt
8 // or copy at http://www.boost.org/LICENSE_1_0.txt)
9
10 // Example of finding mean or sd for normal distribution.
11
12 // Note that this file contains Quickbook mark-up as well as code
13 // and comments, don't change any of the special comment mark-ups!
14
15 //[normal_std
16 /*`
17 First we need some includes to access the normal distribution,
18 the algorithms to find location and scale
19 (and some std output of course).
20 */
21
22 #include <boost/math/distributions/normal.hpp> // for normal_distribution
23 using boost::math::normal; // typedef provides default type is double.
24 #include <boost/math/distributions/cauchy.hpp> // for cauchy_distribution
25 using boost::math::cauchy; // typedef provides default type is double.
26 #include <boost/math/distributions/find_location.hpp>
27 using boost::math::find_location;
28 #include <boost/math/distributions/find_scale.hpp>
29 using boost::math::find_scale;
30 using boost::math::complement;
31 using boost::math::policies::policy;
32
33 #include <iostream>
34 using std::cout; using std::endl; using std::left; using std::showpoint; using std::noshowpoint;
35 #include <iomanip>
36 using std::setw; using std::setprecision;
37 #include <limits>
38 using std::numeric_limits;
39 #include <stdexcept>
40
41 //] [/normal_std Quickbook]
42
main()43 int main()
44 {
45 cout << "Find_location (mean) and find_scale (standard deviation) examples." << endl;
46 try
47 {
48
49 //[normal_find_location_and_scale_eg
50
51 /*`
52 [h4 Using find_location and find_scale to meet dispensing and measurement specifications]
53
54 Consider an example from K Krishnamoorthy,
55 Handbook of Statistical Distributions with Applications,
56 ISBN 1-58488-635-8, (2006) p 126, example 10.3.7.
57
58 "A machine is set to pack 3 kg of ground beef per pack.
59 Over a long period of time it is found that the average packed was 3 kg
60 with a standard deviation of 0.1 kg.
61 Assume the packing is normally distributed."
62
63 We start by constructing a normal distribution with the given parameters:
64 */
65
66 double mean = 3.; // kg
67 double standard_deviation = 0.1; // kg
68 normal packs(mean, standard_deviation);
69 /*`We can then find the fraction (or %) of packages that weigh more than 3.1 kg.
70 */
71
72 double max_weight = 3.1; // kg
73 cout << "Percentage of packs > " << max_weight << " is "
74 << cdf(complement(packs, max_weight)) * 100. << endl; // P(X > 3.1)
75
76 /*`We might want to ensure that 95% of packs are over a minimum weight specification,
77 then we want the value of the mean such that P(X < 2.9) = 0.05.
78
79 Using the mean of 3 kg, we can estimate
80 the fraction of packs that fail to meet the specification of 2.9 kg.
81 */
82
83 double minimum_weight = 2.9;
84 cout <<"Fraction of packs <= " << minimum_weight << " with a mean of " << mean
85 << " is " << cdf(complement(packs, minimum_weight)) << endl;
86 // fraction of packs <= 2.9 with a mean of 3 is 0.841345
87
88 /*`This is 0.84 - more than the target fraction of 0.95.
89 If we want 95% to be over the minimum weight,
90 what should we set the mean weight to be?
91
92 Using the KK StatCalc program supplied with the book and the method given on page 126 gives 3.06449.
93
94 We can confirm this by constructing a new distribution which we call 'xpacks'
95 with a safety margin mean of 3.06449 thus:
96 */
97 double over_mean = 3.06449;
98 normal xpacks(over_mean, standard_deviation);
99 cout << "Fraction of packs >= " << minimum_weight
100 << " with a mean of " << xpacks.mean()
101 << " is " << cdf(complement(xpacks, minimum_weight)) << endl;
102 // fraction of packs >= 2.9 with a mean of 3.06449 is 0.950005
103
104 /*`Using this Math Toolkit, we can calculate the required mean directly thus:
105 */
106 double under_fraction = 0.05; // so 95% are above the minimum weight mean - sd = 2.9
107 double low_limit = standard_deviation;
108 double offset = mean - low_limit - quantile(packs, under_fraction);
109 double nominal_mean = mean + offset;
110 // mean + (mean - low_limit - quantile(packs, under_fraction));
111
112 normal nominal_packs(nominal_mean, standard_deviation);
113 cout << "Setting the packer to " << nominal_mean << " will mean that "
114 << "fraction of packs >= " << minimum_weight
115 << " is " << cdf(complement(nominal_packs, minimum_weight)) << endl;
116 // Setting the packer to 3.06449 will mean that fraction of packs >= 2.9 is 0.95
117
118 /*`
119 This calculation is generalized as the free function called `find_location`,
120 see __algorithms.
121
122 To use this we will need to
123 */
124
125 #include <boost/math/distributions/find_location.hpp>
126 using boost::math::find_location;
127 /*`and then use find_location function to find safe_mean,
128 & construct a new normal distribution called 'goodpacks'.
129 */
130 double safe_mean = find_location<normal>(minimum_weight, under_fraction, standard_deviation);
131 normal good_packs(safe_mean, standard_deviation);
132 /*`with the same confirmation as before:
133 */
134 cout << "Setting the packer to " << nominal_mean << " will mean that "
135 << "fraction of packs >= " << minimum_weight
136 << " is " << cdf(complement(good_packs, minimum_weight)) << endl;
137 // Setting the packer to 3.06449 will mean that fraction of packs >= 2.9 is 0.95
138
139 /*`
140 [h4 Using Cauchy-Lorentz instead of normal distribution]
141
142 After examining the weight distribution of a large number of packs, we might decide that,
143 after all, the assumption of a normal distribution is not really justified.
144 We might find that the fit is better to a __cauchy_distrib.
145 This distribution has wider 'wings', so that whereas most of the values
146 are closer to the mean than the normal, there are also more values than 'normal'
147 that lie further from the mean than the normal.
148
149 This might happen because a larger than normal lump of meat is either included or excluded.
150
151 We first create a __cauchy_distrib with the original mean and standard deviation,
152 and estimate the fraction that lie below our minimum weight specification.
153 */
154
155 cauchy cpacks(mean, standard_deviation);
156 cout << "Cauchy Setting the packer to " << mean << " will mean that "
157 << "fraction of packs >= " << minimum_weight
158 << " is " << cdf(complement(cpacks, minimum_weight)) << endl;
159 // Cauchy Setting the packer to 3 will mean that fraction of packs >= 2.9 is 0.75
160
161 /*`Note that far fewer of the packs meet the specification, only 75% instead of 95%.
162 Now we can repeat the find_location, using the cauchy distribution as template parameter,
163 in place of the normal used above.
164 */
165
166 double lc = find_location<cauchy>(minimum_weight, under_fraction, standard_deviation);
167 cout << "find_location<cauchy>(minimum_weight, over fraction, standard_deviation); " << lc << endl;
168 // find_location<cauchy>(minimum_weight, over fraction, packs.standard_deviation()); 3.53138
169 /*`Note that the safe_mean setting needs to be much higher, 3.53138 instead of 3.06449,
170 so we will make rather less profit.
171
172 And again confirm that the fraction meeting specification is as expected.
173 */
174 cauchy goodcpacks(lc, standard_deviation);
175 cout << "Cauchy Setting the packer to " << lc << " will mean that "
176 << "fraction of packs >= " << minimum_weight
177 << " is " << cdf(complement(goodcpacks, minimum_weight)) << endl;
178 // Cauchy Setting the packer to 3.53138 will mean that fraction of packs >= 2.9 is 0.95
179
180 /*`Finally we could estimate the effect of a much tighter specification,
181 that 99% of packs met the specification.
182 */
183
184 cout << "Cauchy Setting the packer to "
185 << find_location<cauchy>(minimum_weight, 0.99, standard_deviation)
186 << " will mean that "
187 << "fraction of packs >= " << minimum_weight
188 << " is " << cdf(complement(goodcpacks, minimum_weight)) << endl;
189
190 /*`Setting the packer to 3.13263 will mean that fraction of packs >= 2.9 is 0.99,
191 but will more than double the mean loss from 0.0644 to 0.133 kg per pack.
192
193 Of course, this calculation is not limited to packs of meat, it applies to dispensing anything,
194 and it also applies to a 'virtual' material like any measurement.
195
196 The only caveat is that the calculation assumes that the standard deviation (scale) is known with
197 a reasonably low uncertainty, something that is not so easy to ensure in practice.
198 And that the distribution is well defined, __normal_distrib or __cauchy_distrib, or some other.
199
200 If one is simply dispensing a very large number of packs,
201 then it may be feasible to measure the weight of hundreds or thousands of packs.
202 With a healthy 'degrees of freedom', the confidence intervals for the standard deviation
203 are not too wide, typically about + and - 10% for hundreds of observations.
204
205 For other applications, where it is more difficult or expensive to make many observations,
206 the confidence intervals are depressingly wide.
207
208 See [link math_toolkit.stat_tut.weg.cs_eg.chi_sq_intervals Confidence Intervals on the standard deviation]
209 for a worked example
210 [@../../example/chi_square_std_dev_test.cpp chi_square_std_dev_test.cpp]
211 of estimating these intervals.
212
213
214 [h4 Changing the scale or standard deviation]
215
216 Alternatively, we could invest in a better (more precise) packer
217 (or measuring device) with a lower standard deviation, or scale.
218
219 This might cost more, but would reduce the amount we have to 'give away'
220 in order to meet the specification.
221
222 To estimate how much better (how much smaller standard deviation) it would have to be,
223 we need to get the 5% quantile to be located at the under_weight limit, 2.9
224 */
225 double p = 0.05; // wanted p th quantile.
226 cout << "Quantile of " << p << " = " << quantile(packs, p)
227 << ", mean = " << packs.mean() << ", sd = " << packs.standard_deviation() << endl;
228 /*`
229 Quantile of 0.05 = 2.83551, mean = 3, sd = 0.1
230
231 With the current packer (mean = 3, sd = 0.1), the 5% quantile is at 2.8551 kg,
232 a little below our target of 2.9 kg.
233 So we know that the standard deviation is going to have to be smaller.
234
235 Let's start by guessing that it (now 0.1) needs to be halved, to a standard deviation of 0.05 kg.
236 */
237 normal pack05(mean, 0.05);
238 cout << "Quantile of " << p << " = " << quantile(pack05, p)
239 << ", mean = " << pack05.mean() << ", sd = " << pack05.standard_deviation() << endl;
240 // Quantile of 0.05 = 2.91776, mean = 3, sd = 0.05
241
242 cout <<"Fraction of packs >= " << minimum_weight << " with a mean of " << mean
243 << " and standard deviation of " << pack05.standard_deviation()
244 << " is " << cdf(complement(pack05, minimum_weight)) << endl;
245 // Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.05 is 0.97725
246 /*`
247 So 0.05 was quite a good guess, but we are a little over the 2.9 target,
248 so the standard deviation could be a tiny bit more. So we could do some
249 more guessing to get closer, say by increasing standard deviation to 0.06 kg,
250 constructing another new distribution called pack06.
251 */
252 normal pack06(mean, 0.06);
253 cout << "Quantile of " << p << " = " << quantile(pack06, p)
254 << ", mean = " << pack06.mean() << ", sd = " << pack06.standard_deviation() << endl;
255 // Quantile of 0.05 = 2.90131, mean = 3, sd = 0.06
256
257 cout <<"Fraction of packs >= " << minimum_weight << " with a mean of " << mean
258 << " and standard deviation of " << pack06.standard_deviation()
259 << " is " << cdf(complement(pack06, minimum_weight)) << endl;
260 // Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.06 is 0.95221
261 /*`
262 Now we are getting really close, but to do the job properly,
263 we might need to use root finding method, for example the tools provided,
264 and used elsewhere, in the Math Toolkit, see __root_finding_without_derivatives
265
266 But in this (normal) distribution case, we can and should be even smarter
267 and make a direct calculation.
268 */
269
270 /*`Our required limit is minimum_weight = 2.9 kg, often called the random variate z.
271 For a standard normal distribution, then probability p = N((minimum_weight - mean) / sd).
272
273 We want to find the standard deviation that would be required to meet this limit,
274 so that the p th quantile is located at z (minimum_weight).
275 In this case, the 0.05 (5%) quantile is at 2.9 kg pack weight, when the mean is 3 kg,
276 ensuring that 0.95 (95%) of packs are above the minimum weight.
277
278 Rearranging, we can directly calculate the required standard deviation:
279 */
280 normal N01; // standard normal distribution with mean zero and unit standard deviation.
281 p = 0.05;
282 double qp = quantile(N01, p);
283 double sd95 = (minimum_weight - mean) / qp;
284
285 cout << "For the "<< p << "th quantile to be located at "
286 << minimum_weight << ", would need a standard deviation of " << sd95 << endl;
287 // For the 0.05th quantile to be located at 2.9, would need a standard deviation of 0.0607957
288
289 /*`We can now construct a new (normal) distribution pack95 for the 'better' packer,
290 and check that our distribution will meet the specification.
291 */
292
293 normal pack95(mean, sd95);
294 cout <<"Fraction of packs >= " << minimum_weight << " with a mean of " << mean
295 << " and standard deviation of " << pack95.standard_deviation()
296 << " is " << cdf(complement(pack95, minimum_weight)) << endl;
297 // Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.0607957 is 0.95
298
299 /*`This calculation is generalized in the free function find_scale,
300 as shown below, giving the same standard deviation.
301 */
302 double ss = find_scale<normal>(minimum_weight, under_fraction, packs.mean());
303 cout << "find_scale<normal>(minimum_weight, under_fraction, packs.mean()); " << ss << endl;
304 // find_scale<normal>(minimum_weight, under_fraction, packs.mean()); 0.0607957
305
306 /*`If we had defined an over_fraction, or percentage that must pass specification
307 */
308 double over_fraction = 0.95;
309 /*`And (wrongly) written
310
311 double sso = find_scale<normal>(minimum_weight, over_fraction, packs.mean());
312
313 With the default policy, we would get a message like
314
315 [pre
316 Message from thrown exception was:
317 Error in function boost::math::find_scale<Dist, Policy>(double, double, double, Policy):
318 Computed scale (-0.060795683191176959) is <= 0! Was the complement intended?
319 ]
320
321 But this would return a *negative* standard deviation - obviously impossible.
322 The probability should be 1 - over_fraction, not over_fraction, thus:
323 */
324
325 double ss1o = find_scale<normal>(minimum_weight, 1 - over_fraction, packs.mean());
326 cout << "find_scale<normal>(minimum_weight, under_fraction, packs.mean()); " << ss1o << endl;
327 // find_scale<normal>(minimum_weight, under_fraction, packs.mean()); 0.0607957
328
329 /*`But notice that using '1 - over_fraction' - will lead to a
330 loss of accuracy, especially if over_fraction was close to unity. (See __why_complements).
331 In this (very common) case, we should instead use the __complements,
332 giving the most accurate result.
333 */
334
335 double ssc = find_scale<normal>(complement(minimum_weight, over_fraction, packs.mean()));
336 cout << "find_scale<normal>(complement(minimum_weight, over_fraction, packs.mean())); " << ssc << endl;
337 // find_scale<normal>(complement(minimum_weight, over_fraction, packs.mean())); 0.0607957
338
339 /*`Note that our guess of 0.06 was close to the accurate value of 0.060795683191176959.
340
341 We can again confirm our prediction thus:
342 */
343
344 normal pack95c(mean, ssc);
345 cout <<"Fraction of packs >= " << minimum_weight << " with a mean of " << mean
346 << " and standard deviation of " << pack95c.standard_deviation()
347 << " is " << cdf(complement(pack95c, minimum_weight)) << endl;
348 // Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.0607957 is 0.95
349
350 /*`Notice that these two deceptively simple questions:
351
352 * Do we over-fill to make sure we meet a minimum specification (or under-fill to avoid an overdose)?
353
354 and/or
355
356 * Do we measure better?
357
358 are actually extremely common.
359
360 The weight of beef might be replaced by a measurement of more or less anything,
361 from drug tablet content, Apollo landing rocket firing, X-ray treatment doses...
362
363 The scale can be variation in dispensing or uncertainty in measurement.
364 */
365 //] [/normal_find_location_and_scale_eg Quickbook end]
366
367 }
368 catch(const std::exception& e)
369 { // Always useful to include try & catch blocks because default policies
370 // are to throw exceptions on arguments that cause errors like underflow, overflow.
371 // Lacking try & catch blocks, the program will abort without a message below,
372 // which may give some helpful clues as to the cause of the exception.
373 cout <<
374 "\n""Message from thrown exception was:\n " << e.what() << endl;
375 }
376 return 0;
377 } // int main()
378
379
380 /*
381
382 Output is:
383
384 //[normal_find_location_and_scale_output
385
386 Find_location (mean) and find_scale (standard deviation) examples.
387 Percentage of packs > 3.1 is 15.8655
388 Fraction of packs <= 2.9 with a mean of 3 is 0.841345
389 Fraction of packs >= 2.9 with a mean of 3.06449 is 0.950005
390 Setting the packer to 3.06449 will mean that fraction of packs >= 2.9 is 0.95
391 Setting the packer to 3.06449 will mean that fraction of packs >= 2.9 is 0.95
392 Cauchy Setting the packer to 3 will mean that fraction of packs >= 2.9 is 0.75
393 find_location<cauchy>(minimum_weight, over fraction, standard_deviation); 3.53138
394 Cauchy Setting the packer to 3.53138 will mean that fraction of packs >= 2.9 is 0.95
395 Cauchy Setting the packer to -0.282052 will mean that fraction of packs >= 2.9 is 0.95
396 Quantile of 0.05 = 2.83551, mean = 3, sd = 0.1
397 Quantile of 0.05 = 2.91776, mean = 3, sd = 0.05
398 Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.05 is 0.97725
399 Quantile of 0.05 = 2.90131, mean = 3, sd = 0.06
400 Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.06 is 0.95221
401 For the 0.05th quantile to be located at 2.9, would need a standard deviation of 0.0607957
402 Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.0607957 is 0.95
403 find_scale<normal>(minimum_weight, under_fraction, packs.mean()); 0.0607957
404 find_scale<normal>(minimum_weight, under_fraction, packs.mean()); 0.0607957
405 find_scale<normal>(complement(minimum_weight, over_fraction, packs.mean())); 0.0607957
406 Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.0607957 is 0.95
407
408 //] [/normal_find_location_and_scale_eg_output]
409
410 */
411
412
413
414