• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright Paul A. Bristow 2007, 2009, 2010
2 // Copyright John Maddock 2006
3 
4 // Use, modification and distribution are subject to the
5 // Boost Software License, Version 1.0.
6 // (See accompanying file LICENSE_1_0.txt
7 // or copy at http://www.boost.org/LICENSE_1_0.txt)
8 
9 // binomial_examples_quiz.cpp
10 
11 // Simple example of computing probabilities and quantiles for a binomial random variable
12 // representing the correct guesses on a multiple-choice test.
13 
14 // source http://www.stat.wvu.edu/SRS/Modules/Binomial/test.html
15 
16 //[binomial_quiz_example1
17 /*`
18 A multiple choice test has four possible answers to each of 16 questions.
19 A student guesses the answer to each question,
20 so the probability of getting a correct answer on any given question is
21 one in four, a quarter, 1/4, 25% or fraction 0.25.
22 The conditions of the binomial experiment are assumed to be met:
23 n = 16 questions constitute the trials;
24 each question results in one of two possible outcomes (correct or incorrect);
25 the probability of being correct is 0.25 and is constant if no knowledge about the subject is assumed;
26 the questions are answered independently if the student's answer to a question
27 in no way influences his/her answer to another question.
28 
29 First, we need to be able to use the binomial distribution constructor
30 (and some std input/output, of course).
31 */
32 
33 #include <boost/math/distributions/binomial.hpp>
34   using boost::math::binomial;
35 
36 #include <iostream>
37   using std::cout; using std::endl;
38   using std::ios; using std::flush; using std::left; using std::right; using std::fixed;
39 #include <iomanip>
40   using std::setw; using std::setprecision;
41 #include <exception>
42 
43 
44 
45 //][/binomial_quiz_example1]
46 
main()47 int main()
48 {
49   try
50   {
51   cout << "Binomial distribution example - guessing in a quiz." << endl;
52 //[binomial_quiz_example2
53 /*`
54 The number of correct answers, X, is distributed as a binomial random variable
55 with binomial distribution parameters: questions n and success fraction probability p.
56 So we construct a binomial distribution:
57 */
58   int questions = 16; // All the questions in the quiz.
59   int answers = 4; // Possible answers to each question.
60   double success_fraction = 1. / answers; // If a random guess, p = 1/4 = 0.25.
61   binomial quiz(questions, success_fraction);
62 /*`
63 and display the distribution parameters we used thus:
64 */
65   cout << "In a quiz with " << quiz.trials()
66     << " questions and with a probability of guessing right of "
67     << quiz.success_fraction() * 100 << " %"
68     << " or 1 in " << static_cast<int>(1. / quiz.success_fraction()) << endl;
69 /*`
70 Show a few probabilities of just guessing:
71 */
72   cout << "Probability of getting none right is " << pdf(quiz, 0) << endl; // 0.010023
73   cout << "Probability of getting exactly one right is " << pdf(quiz, 1) << endl;
74   cout << "Probability of getting exactly two right is " << pdf(quiz, 2) << endl;
75   int pass_score = 11;
76   cout << "Probability of getting exactly " << pass_score << " answers right by chance is "
77     << pdf(quiz, pass_score) << endl;
78   cout << "Probability of getting all " << questions << " answers right by chance is "
79     << pdf(quiz, questions) << endl;
80 /*`
81 [pre
82 Probability of getting none right is 0.0100226
83 Probability of getting exactly one right is 0.0534538
84 Probability of getting exactly two right is 0.133635
85 Probability of getting exactly 11 right is 0.000247132
86 Probability of getting exactly all 16 answers right by chance is 2.32831e-010
87 ]
88 These don't give any encouragement to guessers!
89 
90 We can tabulate the 'getting exactly right' ( == ) probabilities thus:
91 */
92   cout << "\n" "Guessed Probability" << right << endl;
93   for (int successes = 0; successes <= questions; successes++)
94   {
95     double probability = pdf(quiz, successes);
96     cout << setw(2) << successes << "      " << probability << endl;
97   }
98   cout << endl;
99 /*`
100 [pre
101 Guessed Probability
102  0      0.0100226
103  1      0.0534538
104  2      0.133635
105  3      0.207876
106  4      0.225199
107  5      0.180159
108  6      0.110097
109  7      0.0524273
110  8      0.0196602
111  9      0.00582526
112 10      0.00135923
113 11      0.000247132
114 12      3.43239e-005
115 13      3.5204e-006
116 14      2.51457e-007
117 15      1.11759e-008
118 16      2.32831e-010
119 ]
120 Then we can add the probabilities of some 'exactly right' like this:
121 */
122   cout << "Probability of getting none or one right is " << pdf(quiz, 0) + pdf(quiz, 1) << endl;
123 
124 /*`
125 [pre
126 Probability of getting none or one right is 0.0634764
127 ]
128 But if more than a couple of scores are involved, it is more convenient (and may be more accurate)
129 to use the Cumulative Distribution Function (cdf) instead:
130 */
131   cout << "Probability of getting none or one right is " << cdf(quiz, 1) << endl;
132 /*`
133 [pre
134 Probability of getting none or one right is 0.0634764
135 ]
136 Since the cdf is inclusive, we can get the probability of getting up to 10 right ( <= )
137 */
138   cout << "Probability of getting <= 10 right (to fail) is " << cdf(quiz, 10) << endl;
139 /*`
140 [pre
141 Probability of getting <= 10 right (to fail) is 0.999715
142 ]
143 To get the probability of getting 11 or more right (to pass),
144 it is tempting to use ``1 - cdf(quiz, 10)`` to get the probability of > 10
145 */
146   cout << "Probability of getting > 10 right (to pass) is " << 1 - cdf(quiz, 10) << endl;
147 /*`
148 [pre
149 Probability of getting > 10 right (to pass) is 0.000285239
150 ]
151 But this should be resisted in favor of using the __complements function (see __why_complements).
152 */
153   cout << "Probability of getting > 10 right (to pass) is " << cdf(complement(quiz, 10)) << endl;
154 /*`
155 [pre
156 Probability of getting > 10 right (to pass) is 0.000285239
157 ]
158 And we can check that these two, <= 10 and > 10,  add up to unity.
159 */
160 BOOST_ASSERT((cdf(quiz, 10) + cdf(complement(quiz, 10))) == 1.);
161 /*`
162 If we want a < rather than a <= test, because the CDF is inclusive, we must subtract one from the score.
163 */
164   cout << "Probability of getting less than " << pass_score
165     << " (< " << pass_score << ") answers right by guessing is "
166     << cdf(quiz, pass_score -1) << endl;
167 /*`
168 [pre
169 Probability of getting less than 11 (< 11) answers right by guessing is 0.999715
170 ]
171 and similarly to get a >= rather than a > test
172 we also need to subtract one from the score (and can again check the sum is unity).
173 This is because if the cdf is /inclusive/,
174 then its complement must be /exclusive/ otherwise there would be one possible
175 outcome counted twice!
176 */
177   cout << "Probability of getting at least " << pass_score
178     << "(>= " << pass_score << ") answers right by guessing is "
179     << cdf(complement(quiz, pass_score-1))
180     << ", only 1 in " << 1/cdf(complement(quiz, pass_score-1)) << endl;
181 
182   BOOST_ASSERT((cdf(quiz, pass_score -1) + cdf(complement(quiz, pass_score-1))) == 1);
183 
184 /*`
185 [pre
186 Probability of getting at least 11 (>= 11) answers right by guessing is 0.000285239, only 1 in 3505.83
187 ]
188 Finally we can tabulate some probabilities:
189 */
190   cout << "\n" "At most (<=)""\n""Guessed OK   Probability" << right << endl;
191   for (int score = 0; score <= questions; score++)
192   {
193     cout << setw(2) << score << "           " << setprecision(10)
194       << cdf(quiz, score) << endl;
195   }
196   cout << endl;
197 /*`
198 [pre
199 At most (<=)
200 Guessed OK   Probability
201  0           0.01002259576
202  1           0.0634764398
203  2           0.1971110499
204  3           0.4049871101
205  4           0.6301861752
206  5           0.8103454274
207  6           0.9204427481
208  7           0.9728700437
209  8           0.9925302796
210  9           0.9983555346
211 10           0.9997147608
212 11           0.9999618928
213 12           0.9999962167
214 13           0.9999997371
215 14           0.9999999886
216 15           0.9999999998
217 16           1
218 ]
219 */
220   cout << "\n" "At least (>)""\n""Guessed OK   Probability" << right << endl;
221   for (int score = 0; score <= questions; score++)
222   {
223     cout << setw(2) << score << "           "  << setprecision(10)
224       << cdf(complement(quiz, score)) << endl;
225   }
226 /*`
227 [pre
228 At least (>)
229 Guessed OK   Probability
230  0           0.9899774042
231  1           0.9365235602
232  2           0.8028889501
233  3           0.5950128899
234  4           0.3698138248
235  5           0.1896545726
236  6           0.07955725188
237  7           0.02712995629
238  8           0.00746972044
239  9           0.001644465374
240 10           0.0002852391917
241 11           3.810715862e-005
242 12           3.783265129e-006
243 13           2.628657967e-007
244 14           1.140870154e-008
245 15           2.328306437e-010
246 16           0
247 ]
248 We now consider the probabilities of *ranges* of correct guesses.
249 
250 First, calculate the probability of getting a range of guesses right,
251 by adding the exact probabilities of each from low ... high.
252 */
253   int low = 3; // Getting at least 3 right.
254   int high = 5; // Getting as most 5 right.
255   double sum = 0.;
256   for (int i = low; i <= high; i++)
257   {
258     sum += pdf(quiz, i);
259   }
260   cout.precision(4);
261   cout << "Probability of getting between "
262     << low << " and " << high << " answers right by guessing is "
263     << sum  << endl; // 0.61323
264 /*`
265 [pre
266 Probability of getting between 3 and 5 answers right by guessing is 0.6132
267 ]
268 Or, usually better, we can use the difference of cdfs instead:
269 */
270   cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
271     <<  cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 0.61323
272 /*`
273 [pre
274 Probability of getting between 3 and 5 answers right by guessing is 0.6132
275 ]
276 And we can also try a few more combinations of high and low choices:
277 */
278   low = 1; high = 6;
279   cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
280     <<  cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 1 and 6 P= 0.91042
281   low = 1; high = 8;
282   cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
283     <<  cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 1 <= x 8 P = 0.9825
284   low = 4; high = 4;
285   cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
286     <<  cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 4 <= x 4 P = 0.22520
287 
288 /*`
289 [pre
290 Probability of getting between 1 and 6 answers right by guessing is 0.9104
291 Probability of getting between 1 and 8 answers right by guessing is 0.9825
292 Probability of getting between 4 and 4 answers right by guessing is 0.2252
293 ]
294 [h4 Using Binomial distribution moments]
295 Using moments of the distribution, we can say more about the spread of results from guessing.
296 */
297   cout << "By guessing, on average, one can expect to get " << mean(quiz) << " correct answers." << endl;
298   cout << "Standard deviation is " << standard_deviation(quiz) << endl;
299   cout << "So about 2/3 will lie within 1 standard deviation and get between "
300     <<  ceil(mean(quiz) - standard_deviation(quiz))  << " and "
301     << floor(mean(quiz) + standard_deviation(quiz)) << " correct." << endl;
302   cout << "Mode (the most frequent) is " << mode(quiz) << endl;
303   cout << "Skewness is " << skewness(quiz) << endl;
304 
305 /*`
306 [pre
307 By guessing, on average, one can expect to get 4 correct answers.
308 Standard deviation is 1.732
309 So about 2/3 will lie within 1 standard deviation and get between 3 and 5 correct.
310 Mode (the most frequent) is 4
311 Skewness is 0.2887
312 ]
313 [h4 Quantiles]
314 The quantiles (percentiles or percentage points) for a few probability levels:
315 */
316   cout << "Quartiles " << quantile(quiz, 0.25) << " to "
317     << quantile(complement(quiz, 0.25)) << endl; // Quartiles
318   cout << "1 standard deviation " << quantile(quiz, 0.33) << " to "
319     << quantile(quiz, 0.67) << endl; // 1 sd
320   cout << "Deciles " << quantile(quiz, 0.1)  << " to "
321     << quantile(complement(quiz, 0.1))<< endl; // Deciles
322   cout << "5 to 95% " << quantile(quiz, 0.05)  << " to "
323     << quantile(complement(quiz, 0.05))<< endl; // 5 to 95%
324   cout << "2.5 to 97.5% " << quantile(quiz, 0.025) << " to "
325     <<  quantile(complement(quiz, 0.025)) << endl; // 2.5 to 97.5%
326   cout << "2 to 98% " << quantile(quiz, 0.02)  << " to "
327     << quantile(complement(quiz, 0.02)) << endl; //  2 to 98%
328 
329   cout << "If guessing then percentiles 1 to 99% will get " << quantile(quiz, 0.01)
330     << " to " << quantile(complement(quiz, 0.01)) << " right." << endl;
331 /*`
332 Notice that these output integral values because the default policy is `integer_round_outwards`.
333 [pre
334 Quartiles 2 to 5
335 1 standard deviation 2 to 5
336 Deciles 1 to 6
337 5 to 95% 0 to 7
338 2.5 to 97.5% 0 to 8
339 2 to 98% 0 to 8
340 ]
341 */
342 
343 //] [/binomial_quiz_example2]
344 
345 //[discrete_quantile_real
346 /*`
347 Quantiles values are controlled by the __understand_dis_quant  quantile policy chosen.
348 The default is `integer_round_outwards`,
349 so the lower quantile is rounded down, and the upper quantile is rounded up.
350 
351 But we might believe that the real values tell us a little more - see __math_discrete.
352 
353 We could control the policy for *all* distributions by
354 
355   #define BOOST_MATH_DISCRETE_QUANTILE_POLICY real
356 
357   at the head of the program would make this policy apply
358 to this *one, and only*, translation unit.
359 
360 Or we can now create a (typedef for) policy that has discrete quantiles real
361 (here avoiding any 'using namespaces ...' statements):
362 */
363   using boost::math::policies::policy;
364   using boost::math::policies::discrete_quantile;
365   using boost::math::policies::real;
366   using boost::math::policies::integer_round_outwards; // Default.
367   typedef boost::math::policies::policy<discrete_quantile<real> > real_quantile_policy;
368 /*`
369 Add a custom binomial distribution called ``real_quantile_binomial`` that uses ``real_quantile_policy``
370 */
371   using boost::math::binomial_distribution;
372   typedef binomial_distribution<double, real_quantile_policy> real_quantile_binomial;
373 /*`
374 Construct an object of this custom distribution:
375 */
376   real_quantile_binomial quiz_real(questions, success_fraction);
377 /*`
378 And use this to show some quantiles - that now have real rather than integer values.
379 */
380   cout << "Quartiles " << quantile(quiz, 0.25) << " to "
381     << quantile(complement(quiz_real, 0.25)) << endl; // Quartiles 2 to 4.6212
382   cout << "1 standard deviation " << quantile(quiz_real, 0.33) << " to "
383     << quantile(quiz_real, 0.67) << endl; // 1 sd 2.6654 4.194
384   cout << "Deciles " << quantile(quiz_real, 0.1)  << " to "
385     << quantile(complement(quiz_real, 0.1))<< endl; // Deciles 1.3487 5.7583
386   cout << "5 to 95% " << quantile(quiz_real, 0.05)  << " to "
387     << quantile(complement(quiz_real, 0.05))<< endl; // 5 to 95% 0.83739 6.4559
388   cout << "2.5 to 97.5% " << quantile(quiz_real, 0.025) << " to "
389     <<  quantile(complement(quiz_real, 0.025)) << endl; // 2.5 to 97.5% 0.42806 7.0688
390   cout << "2 to 98% " << quantile(quiz_real, 0.02)  << " to "
391     << quantile(complement(quiz_real, 0.02)) << endl; //  2 to 98% 0.31311 7.7880
392 
393   cout << "If guessing, then percentiles 1 to 99% will get " << quantile(quiz_real, 0.01)
394     << " to " << quantile(complement(quiz_real, 0.01)) << " right." << endl;
395 /*`
396 [pre
397 Real Quantiles
398 Quartiles 2 to 4.621
399 1 standard deviation 2.665 to 4.194
400 Deciles 1.349 to 5.758
401 5 to 95% 0.8374 to 6.456
402 2.5 to 97.5% 0.4281 to 7.069
403 2 to 98% 0.3131 to 7.252
404 If guessing then percentiles 1 to 99% will get 0 to 7.788 right.
405 ]
406 */
407 
408 //] [/discrete_quantile_real]
409   }
410   catch(const std::exception& e)
411   { // Always useful to include try & catch blocks because
412     // default policies are to throw exceptions on arguments that cause
413     // errors like underflow, overflow.
414     // Lacking try & catch blocks, the program will abort without a message below,
415     // which may give some helpful clues as to the cause of the exception.
416     std::cout <<
417       "\n""Message from thrown exception was:\n   " << e.what() << std::endl;
418   }
419   return 0;
420 } // int main()
421 
422 
423 
424 /*
425 
426 Output is:
427 
428 BAutorun "i:\boost-06-05-03-1300\libs\math\test\Math_test\debug\binomial_quiz_example.exe"
429 Binomial distribution example - guessing in a quiz.
430 In a quiz with 16 questions and with a probability of guessing right of 25 % or 1 in 4
431 Probability of getting none right is 0.0100226
432 Probability of getting exactly one right is 0.0534538
433 Probability of getting exactly two right is 0.133635
434 Probability of getting exactly 11 answers right by chance is 0.000247132
435 Probability of getting all 16 answers right by chance is 2.32831e-010
436 Guessed Probability
437  0      0.0100226
438  1      0.0534538
439  2      0.133635
440  3      0.207876
441  4      0.225199
442  5      0.180159
443  6      0.110097
444  7      0.0524273
445  8      0.0196602
446  9      0.00582526
447 10      0.00135923
448 11      0.000247132
449 12      3.43239e-005
450 13      3.5204e-006
451 14      2.51457e-007
452 15      1.11759e-008
453 16      2.32831e-010
454 Probability of getting none or one right is 0.0634764
455 Probability of getting none or one right is 0.0634764
456 Probability of getting <= 10 right (to fail) is 0.999715
457 Probability of getting > 10 right (to pass) is 0.000285239
458 Probability of getting > 10 right (to pass) is 0.000285239
459 Probability of getting less than 11 (< 11) answers right by guessing is 0.999715
460 Probability of getting at least 11(>= 11) answers right by guessing is 0.000285239, only 1 in 3505.83
461 At most (<=)
462 Guessed OK   Probability
463  0           0.01002259576
464  1           0.0634764398
465  2           0.1971110499
466  3           0.4049871101
467  4           0.6301861752
468  5           0.8103454274
469  6           0.9204427481
470  7           0.9728700437
471  8           0.9925302796
472  9           0.9983555346
473 10           0.9997147608
474 11           0.9999618928
475 12           0.9999962167
476 13           0.9999997371
477 14           0.9999999886
478 15           0.9999999998
479 16           1
480 At least (>)
481 Guessed OK   Probability
482  0           0.9899774042
483  1           0.9365235602
484  2           0.8028889501
485  3           0.5950128899
486  4           0.3698138248
487  5           0.1896545726
488  6           0.07955725188
489  7           0.02712995629
490  8           0.00746972044
491  9           0.001644465374
492 10           0.0002852391917
493 11           3.810715862e-005
494 12           3.783265129e-006
495 13           2.628657967e-007
496 14           1.140870154e-008
497 15           2.328306437e-010
498 16           0
499 Probability of getting between 3 and 5 answers right by guessing is 0.6132
500 Probability of getting between 3 and 5 answers right by guessing is 0.6132
501 Probability of getting between 1 and 6 answers right by guessing is 0.9104
502 Probability of getting between 1 and 8 answers right by guessing is 0.9825
503 Probability of getting between 4 and 4 answers right by guessing is 0.2252
504 By guessing, on average, one can expect to get 4 correct answers.
505 Standard deviation is 1.732
506 So about 2/3 will lie within 1 standard deviation and get between 3 and 5 correct.
507 Mode (the most frequent) is 4
508 Skewness is 0.2887
509 Quartiles 2 to 5
510 1 standard deviation 2 to 5
511 Deciles 1 to 6
512 5 to 95% 0 to 7
513 2.5 to 97.5% 0 to 8
514 2 to 98% 0 to 8
515 If guessing then percentiles 1 to 99% will get 0 to 8 right.
516 Quartiles 2 to 4.621
517 1 standard deviation 2.665 to 4.194
518 Deciles 1.349 to 5.758
519 5 to 95% 0.8374 to 6.456
520 2.5 to 97.5% 0.4281 to 7.069
521 2 to 98% 0.3131 to 7.252
522 If guessing, then percentiles 1 to 99% will get 0 to 7.788 right.
523 
524 */
525 
526