1 // students_t_example3.cpp
2 // Copyright Paul A. Bristow 2006, 2007.
3
4 // Use, modification and distribution are subject to the
5 // Boost Software License, Version 1.0.
6 // (See accompanying file LICENSE_1_0.txt
7 // or copy at http://www.boost.org/LICENSE_1_0.txt)
8
9 // Example 3 of using Student's t.
10
11 // A general guide to Student's t is at
12 // http://en.wikipedia.org/wiki/Student's_t-test
13 // (and many other elementary and advanced statistics texts).
14 // It says:
15 // The t statistic was invented by William Sealy Gosset
16 // for cheaply monitoring the quality of beer brews.
17 // "Student" was his pen name.
18 // Gosset was statistician for Guinness brewery in Dublin, Ireland,
19 // hired due to Claude Guinness's innovative policy of recruiting the
20 // best graduates from Oxford and Cambridge for applying biochemistry
21 // and statistics to Guinness's industrial processes.
22 // Gosset published the t test in Biometrika in 1908,
23 // but was forced to use a pen name by his employer who regarded the fact
24 // that they were using statistics as a trade secret.
25 // In fact, Gosset's identity was unknown not only to fellow statisticians
26 // but to his employer - the company insisted on the pseudonym
27 // so that it could turn a blind eye to the breach of its rules.
28
29 // The Students't distribution function is described at
30 // http://en.wikipedia.org/wiki/Student%27s_t_distribution
31
32 #include <boost/math/distributions/students_t.hpp>
33 using boost::math::students_t; // Probability of students_t(df, t).
34
35 #include <iostream>
36 using std::cout; using std::endl;
37 #include <iomanip>
38 using std::setprecision; using std::setw;
39 #include <cmath>
40 using std::sqrt;
41
42 // This example of a two-sided test is from:
43 // B. M. Smith & M. B. Griffiths, Analyst, 1982, 107, 253,
44 // from Statistics for Analytical Chemistry, 3rd ed. (1994), pp 58-59
45 // J. C. Miller and J. N. Miller, Ellis Horwood ISBN 0 13 0309907
46
47 // Concentrations of lead (ug/l) determined by two different methods
48 // for each of four test portions,
49 // the concentration of each portion is significantly different,
50 // the values may NOT be pooled.
51 // (Called a 'paired test' by Miller and Miller
52 // because each portion analysed has a different concentration.)
53
54 // Portion Wet oxidation Direct Extraction
55 // 1 71 76
56 // 2 61 68
57 // 3 50 48
58 // 4 60 57
59
60 const int portions = 4;
61 const int methods = 2;
62 float data [portions][methods] = {{71, 76}, {61,68}, {50, 48}, {60, 57}};
63 float diffs[portions];
64
main()65 int main()
66 {
67 cout << "Example3 using Student's t function. " << endl;
68 float mean_diff = 0.f;
69 cout << "\n""Portion wet_oxidation Direct_extraction difference" << endl;
70 for (int portion = 0; portion < portions; portion++)
71 { // Echo data and differences.
72 diffs[portion] = data[portion][0] - data[portion][1];
73 mean_diff += diffs[portion];
74 cout << setw(4) << portion << ' ' << setw(14) << data[portion][0] << ' ' << setw(18)<< data[portion][1] << ' ' << setw(9) << diffs[portion] << endl;
75 }
76 mean_diff /= portions;
77 cout << "Mean difference = " << mean_diff << endl; // -1.75
78
79 float sd_diffs = 0.f;
80 for (int portion = 0; portion < portions; portion++)
81 { // Calculate standard deviation of differences.
82 sd_diffs +=(diffs[portion] - mean_diff) * (diffs[portion] - mean_diff);
83 }
84 int degrees_of_freedom = portions-1; // Use the n-1 formula.
85 sd_diffs /= degrees_of_freedom;
86 sd_diffs = sqrt(sd_diffs);
87 cout << "Standard deviation of differences = " << sd_diffs << endl; // 4.99166
88 // Standard deviation of differences = 4.99166
89 double t = mean_diff * sqrt(static_cast<double>(portions))/ sd_diffs; // -0.70117
90 cout << "Student's t = " << t << ", if " << degrees_of_freedom << " degrees of freedom." << endl;
91 // Student's t = -0.70117, if 3 degrees of freedom.
92 cout << "Probability of the means being different is "
93 << 2.F * cdf(students_t(degrees_of_freedom), t) << "."<< endl; // 0.266846 * 2 = 0.533692
94 // Double the probability because using a 'two-sided test' because
95 // mean for 'Wet oxidation' could be either
96 // greater OR LESS THAN for 'Direct extraction'.
97
98 return 0;
99 } // int main()
100
101 /*
102
103 Output is:
104
105 Example3 using Student's t function.
106 Portion wet_oxidation Direct_extraction difference
107 0 71 76 -5
108 1 61 68 -7
109 2 50 48 2
110 3 60 57 3
111 Mean difference = -1.75
112 Standard deviation of differences = 4.99166
113 Student's t = -0.70117, if 3 degrees of freedom.
114 Probability of the means being different is 0.533692.
115
116 */
117
118
119
120
121