1 // Copyright John Maddock 2006
2 // Copyright Paul A. Bristow 2007, 2010
3
4 // Use, modification and distribution are subject to the
5 // Boost Software License, Version 1.0.
6 // (See accompanying file LICENSE_1_0.txt
7 // or copy at http://www.boost.org/LICENSE_1_0.txt)
8
9 #ifdef _MSC_VER
10 # pragma warning(disable: 4512) // assignment operator could not be generated.
11 # pragma warning(disable: 4510) // default constructor could not be generated.
12 # pragma warning(disable: 4610) // can never be instantiated - user defined constructor required.
13 #endif
14
15 #include <boost/math/distributions/students_t.hpp>
16
17 // avoid "using namespace std;" and "using namespace boost::math;"
18 // to avoid potential ambiguity with names in std random.
19 #include <iostream>
20 using std::cout; using std::endl;
21 using std::left; using std::fixed; using std::right; using std::scientific;
22 #include <iomanip>
23 using std::setw;
24 using std::setprecision;
25
confidence_limits_on_mean(double Sm,double Sd,unsigned Sn)26 void confidence_limits_on_mean(double Sm, double Sd, unsigned Sn)
27 {
28 //
29 // Sm = Sample Mean.
30 // Sd = Sample Standard Deviation.
31 // Sn = Sample Size.
32 //
33 // Calculate confidence intervals for the mean.
34 // For example if we set the confidence limit to
35 // 0.95, we know that if we repeat the sampling
36 // 100 times, then we expect that the true mean
37 // will be between out limits on 95 occasions.
38 // Note: this is not the same as saying a 95%
39 // confidence interval means that there is a 95%
40 // probability that the interval contains the true mean.
41 // The interval computed from a given sample either
42 // contains the true mean or it does not.
43 // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
44
45 using boost::math::students_t;
46
47 // Print out general info:
48 cout <<
49 "__________________________________\n"
50 "2-Sided Confidence Limits For Mean\n"
51 "__________________________________\n\n";
52 cout << setprecision(7);
53 cout << setw(40) << left << "Number of Observations" << "= " << Sn << "\n";
54 cout << setw(40) << left << "Mean" << "= " << Sm << "\n";
55 cout << setw(40) << left << "Standard Deviation" << "= " << Sd << "\n";
56 //
57 // Define a table of significance/risk levels:
58 //
59 double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
60 //
61 // Start by declaring the distribution we'll need:
62 //
63 students_t dist(Sn - 1);
64 //
65 // Print table header:
66 //
67 cout << "\n\n"
68 "_______________________________________________________________\n"
69 "Confidence T Interval Lower Upper\n"
70 " Value (%) Value Width Limit Limit\n"
71 "_______________________________________________________________\n";
72 //
73 // Now print out the data for the table rows.
74 //
75 for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
76 {
77 // Confidence value:
78 cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
79 // calculate T:
80 double T = quantile(complement(dist, alpha[i] / 2));
81 // Print T:
82 cout << fixed << setprecision(3) << setw(10) << right << T;
83 // Calculate width of interval (one sided):
84 double w = T * Sd / sqrt(double(Sn));
85 // Print width:
86 if(w < 0.01)
87 cout << scientific << setprecision(3) << setw(17) << right << w;
88 else
89 cout << fixed << setprecision(3) << setw(17) << right << w;
90 // Print Limits:
91 cout << fixed << setprecision(5) << setw(15) << right << Sm - w;
92 cout << fixed << setprecision(5) << setw(15) << right << Sm + w << endl;
93 }
94 cout << endl;
95 } // void confidence_limits_on_mean
96
single_sample_t_test(double M,double Sm,double Sd,unsigned Sn,double alpha)97 void single_sample_t_test(double M, double Sm, double Sd, unsigned Sn, double alpha)
98 {
99 //
100 // M = true mean.
101 // Sm = Sample Mean.
102 // Sd = Sample Standard Deviation.
103 // Sn = Sample Size.
104 // alpha = Significance Level.
105 //
106 // A Students t test applied to a single set of data.
107 // We are testing the null hypothesis that the true
108 // mean of the sample is M, and that any variation is down
109 // to chance. We can also test the alternative hypothesis
110 // that any difference is not down to chance.
111 // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
112
113 using boost::math::students_t;
114
115 // Print header:
116 cout <<
117 "__________________________________\n"
118 "Student t test for a single sample\n"
119 "__________________________________\n\n";
120 cout << setprecision(5);
121 cout << setw(55) << left << "Number of Observations" << "= " << Sn << "\n";
122 cout << setw(55) << left << "Sample Mean" << "= " << Sm << "\n";
123 cout << setw(55) << left << "Sample Standard Deviation" << "= " << Sd << "\n";
124 cout << setw(55) << left << "Expected True Mean" << "= " << M << "\n\n";
125 //
126 // Now we can calculate and output some stats:
127 //
128 // Difference in means:
129 double diff = Sm - M;
130 cout << setw(55) << left << "Sample Mean - Expected Test Mean" << "= " << diff << "\n";
131 // Degrees of freedom:
132 unsigned v = Sn - 1;
133 cout << setw(55) << left << "Degrees of Freedom" << "= " << v << "\n";
134 // t-statistic:
135 double t_stat = diff * sqrt(double(Sn)) / Sd;
136 cout << setw(55) << left << "T Statistic" << "= " << t_stat << "\n";
137 //
138 // Finally define our distribution, and get the probability:
139 //
140 students_t dist(v);
141 double q = cdf(complement(dist, fabs(t_stat)));
142 cout << setw(55) << left << "Probability that difference is due to chance" << "= "
143 << setprecision(3) << scientific << 2 * q << "\n\n";
144 //
145 // Finally print out results of alternative hypothesis:
146 //
147 cout << setw(55) << left <<
148 "Results for Alternative Hypothesis and alpha" << "= "
149 << setprecision(4) << fixed << alpha << "\n\n";
150 cout << "Alternative Hypothesis Conclusion\n";
151 cout << "Mean != " << setprecision(3) << fixed << M << " ";
152 if(q < alpha / 2)
153 cout << "NOT REJECTED\n";
154 else
155 cout << "REJECTED\n";
156 cout << "Mean < " << setprecision(3) << fixed << M << " ";
157 if(cdf(complement(dist, t_stat)) > alpha)
158 cout << "NOT REJECTED\n";
159 else
160 cout << "REJECTED\n";
161 cout << "Mean > " << setprecision(3) << fixed << M << " ";
162 if(cdf(dist, t_stat) > alpha)
163 cout << "NOT REJECTED\n";
164 else
165 cout << "REJECTED\n";
166 cout << endl << endl;
167 } // void single_sample_t_test(
168
single_sample_find_df(double M,double Sm,double Sd)169 void single_sample_find_df(double M, double Sm, double Sd)
170 {
171 //
172 // M = true mean.
173 // Sm = Sample Mean.
174 // Sd = Sample Standard Deviation.
175 //
176
177 using boost::math::students_t;
178
179 // Print out general info:
180 cout <<
181 "_____________________________________________________________\n"
182 "Estimated sample sizes required for various confidence levels\n"
183 "_____________________________________________________________\n\n";
184 cout << setprecision(5);
185 cout << setw(40) << left << "True Mean" << "= " << M << "\n";
186 cout << setw(40) << left << "Sample Mean" << "= " << Sm << "\n";
187 cout << setw(40) << left << "Sample Standard Deviation" << "= " << Sd << "\n";
188 //
189 // Define a table of significance intervals:
190 //
191 double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
192 //
193 // Print table header:
194 //
195 cout << "\n\n"
196 "_______________________________________________________________\n"
197 "Confidence Estimated Estimated\n"
198 " Value (%) Sample Size Sample Size\n"
199 " (one sided test) (two sided test)\n"
200 "_______________________________________________________________\n";
201 //
202 // Now print out the data for the table rows.
203 //
204 for(unsigned i = 1; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
205 {
206 // Confidence value:
207 cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
208 // calculate df for single sided test:
209 double df = students_t::find_degrees_of_freedom(
210 fabs(M - Sm), alpha[i], alpha[i], Sd);
211 // convert to sample size, always one more than the degrees of freedom:
212 double size = ceil(df) + 1;
213 // Print size:
214 cout << fixed << setprecision(0) << setw(16) << right << size;
215 // calculate df for two sided test:
216 df = students_t::find_degrees_of_freedom(
217 fabs(M - Sm), alpha[i]/2, alpha[i], Sd);
218 // convert to sample size:
219 size = ceil(df) + 1;
220 // Print size:
221 cout << fixed << setprecision(0) << setw(16) << right << size << endl;
222 }
223 cout << endl;
224 } // void single_sample_find_df
225
main()226 int main()
227 {
228 //
229 // Run tests for Heat Flow Meter data
230 // see http://www.itl.nist.gov/div898/handbook/eda/section4/eda428.htm
231 // The data was collected while calibrating a heat flow meter
232 // against a known value.
233 //
234 confidence_limits_on_mean(9.261460, 0.2278881e-01, 195);
235 single_sample_t_test(5, 9.261460, 0.2278881e-01, 195, 0.05);
236 single_sample_find_df(5, 9.261460, 0.2278881e-01);
237
238 //
239 // Data for this example from:
240 // P.K.Hou, O. W. Lau & M.C. Wong, Analyst (1983) vol. 108, p 64.
241 // from Statistics for Analytical Chemistry, 3rd ed. (1994), pp 54-55
242 // J. C. Miller and J. N. Miller, Ellis Horwood ISBN 0 13 0309907
243 //
244 // Determination of mercury by cold-vapour atomic absorption,
245 // the following values were obtained fusing a trusted
246 // Standard Reference Material containing 38.9% mercury,
247 // which we assume is correct or 'true'.
248 //
249 confidence_limits_on_mean(37.8, 0.964365, 3);
250 // 95% test:
251 single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.05);
252 // 90% test:
253 single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.1);
254 // parameter estimate:
255 single_sample_find_df(38.9, 37.8, 0.964365);
256
257 return 0;
258 } // int main()
259
260 /*
261
262 Output:
263
264 ------ Rebuild All started: Project: students_t_single_sample, Configuration: Release Win32 ------
265 students_t_single_sample.cpp
266 Generating code
267 Finished generating code
268 students_t_single_sample.vcxproj -> J:\Cpp\MathToolkit\test\Math_test\Release\students_t_single_sample.exe
269 __________________________________
270 2-Sided Confidence Limits For Mean
271 __________________________________
272
273 Number of Observations = 195
274 Mean = 9.26146
275 Standard Deviation = 0.02278881
276
277
278 _______________________________________________________________
279 Confidence T Interval Lower Upper
280 Value (%) Value Width Limit Limit
281 _______________________________________________________________
282 50.000 0.676 1.103e-003 9.26036 9.26256
283 75.000 1.154 1.883e-003 9.25958 9.26334
284 90.000 1.653 2.697e-003 9.25876 9.26416
285 95.000 1.972 3.219e-003 9.25824 9.26468
286 99.000 2.601 4.245e-003 9.25721 9.26571
287 99.900 3.341 5.453e-003 9.25601 9.26691
288 99.990 3.973 6.484e-003 9.25498 9.26794
289 99.999 4.537 7.404e-003 9.25406 9.26886
290
291 __________________________________
292 Student t test for a single sample
293 __________________________________
294
295 Number of Observations = 195
296 Sample Mean = 9.26146
297 Sample Standard Deviation = 0.02279
298 Expected True Mean = 5.00000
299
300 Sample Mean - Expected Test Mean = 4.26146
301 Degrees of Freedom = 194
302 T Statistic = 2611.28380
303 Probability that difference is due to chance = 0.000e+000
304
305 Results for Alternative Hypothesis and alpha = 0.0500
306
307 Alternative Hypothesis Conclusion
308 Mean != 5.000 NOT REJECTED
309 Mean < 5.000 REJECTED
310 Mean > 5.000 NOT REJECTED
311
312
313 _____________________________________________________________
314 Estimated sample sizes required for various confidence levels
315 _____________________________________________________________
316
317 True Mean = 5.00000
318 Sample Mean = 9.26146
319 Sample Standard Deviation = 0.02279
320
321
322 _______________________________________________________________
323 Confidence Estimated Estimated
324 Value (%) Sample Size Sample Size
325 (one sided test) (two sided test)
326 _______________________________________________________________
327 75.000 2 2
328 90.000 2 2
329 95.000 2 2
330 99.000 2 2
331 99.900 3 3
332 99.990 3 3
333 99.999 4 4
334
335 __________________________________
336 2-Sided Confidence Limits For Mean
337 __________________________________
338
339 Number of Observations = 3
340 Mean = 37.8000000
341 Standard Deviation = 0.9643650
342
343
344 _______________________________________________________________
345 Confidence T Interval Lower Upper
346 Value (%) Value Width Limit Limit
347 _______________________________________________________________
348 50.000 0.816 0.455 37.34539 38.25461
349 75.000 1.604 0.893 36.90717 38.69283
350 90.000 2.920 1.626 36.17422 39.42578
351 95.000 4.303 2.396 35.40438 40.19562
352 99.000 9.925 5.526 32.27408 43.32592
353 99.900 31.599 17.594 20.20639 55.39361
354 99.990 99.992 55.673 -17.87346 93.47346
355 99.999 316.225 176.067 -138.26683 213.86683
356
357 __________________________________
358 Student t test for a single sample
359 __________________________________
360
361 Number of Observations = 3
362 Sample Mean = 37.80000
363 Sample Standard Deviation = 0.96437
364 Expected True Mean = 38.90000
365
366 Sample Mean - Expected Test Mean = -1.10000
367 Degrees of Freedom = 2
368 T Statistic = -1.97566
369 Probability that difference is due to chance = 1.869e-001
370
371 Results for Alternative Hypothesis and alpha = 0.0500
372
373 Alternative Hypothesis Conclusion
374 Mean != 38.900 REJECTED
375 Mean < 38.900 NOT REJECTED
376 Mean > 38.900 NOT REJECTED
377
378
379 __________________________________
380 Student t test for a single sample
381 __________________________________
382
383 Number of Observations = 3
384 Sample Mean = 37.80000
385 Sample Standard Deviation = 0.96437
386 Expected True Mean = 38.90000
387
388 Sample Mean - Expected Test Mean = -1.10000
389 Degrees of Freedom = 2
390 T Statistic = -1.97566
391 Probability that difference is due to chance = 1.869e-001
392
393 Results for Alternative Hypothesis and alpha = 0.1000
394
395 Alternative Hypothesis Conclusion
396 Mean != 38.900 REJECTED
397 Mean < 38.900 NOT REJECTED
398 Mean > 38.900 REJECTED
399
400
401 _____________________________________________________________
402 Estimated sample sizes required for various confidence levels
403 _____________________________________________________________
404
405 True Mean = 38.90000
406 Sample Mean = 37.80000
407 Sample Standard Deviation = 0.96437
408
409
410 _______________________________________________________________
411 Confidence Estimated Estimated
412 Value (%) Sample Size Sample Size
413 (one sided test) (two sided test)
414 _______________________________________________________________
415 75.000 3 4
416 90.000 7 9
417 95.000 11 13
418 99.000 20 22
419 99.900 35 37
420 99.990 50 53
421 99.999 66 68
422
423 */
424
425