1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include <math.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "igt_core.h"
30 #include "igt_stats.h"
31
32 #define U64_MAX ((uint64_t)~0ULL)
33
34 #define sorted_value(stats, i) (stats->is_float ? stats->sorted_f[i] : stats->sorted_u64[i])
35 #define unsorted_value(stats, i) (stats->is_float ? stats->values_f[i] : stats->values_u64[i])
36
37 /**
38 * SECTION:igt_stats
39 * @short_description: Tools for statistical analysis
40 * @title: Stats
41 * @include: igt.h
42 *
43 * Various tools to make sense of data.
44 *
45 * #igt_stats_t is a container of data samples. igt_stats_push() is used to add
46 * new samples and various results (mean, variance, standard deviation, ...)
47 * can then be retrieved.
48 *
49 * |[
50 * igt_stats_t stats;
51 *
52 * igt_stats_init(&stats, 8);
53 *
54 * igt_stats_push(&stats, 2);
55 * igt_stats_push(&stats, 4);
56 * igt_stats_push(&stats, 4);
57 * igt_stats_push(&stats, 4);
58 * igt_stats_push(&stats, 5);
59 * igt_stats_push(&stats, 5);
60 * igt_stats_push(&stats, 7);
61 * igt_stats_push(&stats, 9);
62 *
63 * printf("Mean: %lf\n", igt_stats_get_mean(&stats));
64 *
65 * igt_stats_fini(&stats);
66 * ]|
67 */
68
get_new_capacity(int need)69 static unsigned int get_new_capacity(int need)
70 {
71 unsigned int new_capacity;
72
73 /* taken from Python's list */
74 new_capacity = (need >> 6) + (need < 9 ? 3 : 6);
75 new_capacity += need;
76
77 return new_capacity;
78 }
79
igt_stats_ensure_capacity(igt_stats_t * stats,unsigned int n_additional_values)80 static void igt_stats_ensure_capacity(igt_stats_t *stats,
81 unsigned int n_additional_values)
82 {
83 unsigned int new_n_values = stats->n_values + n_additional_values;
84 unsigned int new_capacity;
85
86 if (new_n_values <= stats->capacity)
87 return;
88
89 new_capacity = get_new_capacity(new_n_values);
90 stats->values_u64 = realloc(stats->values_u64,
91 sizeof(*stats->values_u64) * new_capacity);
92 igt_assert(stats->values_u64);
93
94 stats->capacity = new_capacity;
95
96 free(stats->sorted_u64);
97 stats->sorted_u64 = NULL;
98 }
99
100 /**
101 * igt_stats_init:
102 * @stats: An #igt_stats_t instance
103 *
104 * Initializes an #igt_stats_t instance. igt_stats_fini() must be called once
105 * finished with @stats.
106 */
igt_stats_init(igt_stats_t * stats)107 void igt_stats_init(igt_stats_t *stats)
108 {
109 memset(stats, 0, sizeof(*stats));
110
111 igt_stats_ensure_capacity(stats, 128);
112
113 stats->min = U64_MAX;
114 stats->max = 0;
115 }
116
117 /**
118 * igt_stats_init_with_size:
119 * @stats: An #igt_stats_t instance
120 * @capacity: Number of data samples @stats can contain
121 *
122 * Like igt_stats_init() but with a size to avoid reallocating the underlying
123 * array(s) when pushing new values. Useful if we have a good idea of the
124 * number of data points we want @stats to hold.
125 *
126 * igt_stats_fini() must be called once finished with @stats.
127 */
igt_stats_init_with_size(igt_stats_t * stats,unsigned int capacity)128 void igt_stats_init_with_size(igt_stats_t *stats, unsigned int capacity)
129 {
130 memset(stats, 0, sizeof(*stats));
131
132 igt_stats_ensure_capacity(stats, capacity);
133
134 stats->min = U64_MAX;
135 stats->max = 0;
136 stats->range[0] = HUGE_VAL;
137 stats->range[1] = -HUGE_VAL;
138 }
139
140 /**
141 * igt_stats_fini:
142 * @stats: An #igt_stats_t instance
143 *
144 * Frees resources allocated in igt_stats_init().
145 */
igt_stats_fini(igt_stats_t * stats)146 void igt_stats_fini(igt_stats_t *stats)
147 {
148 free(stats->values_u64);
149 free(stats->sorted_u64);
150 }
151
152
153 /**
154 * igt_stats_is_population:
155 * @stats: An #igt_stats_t instance
156 *
157 * Returns: #true if @stats represents a population, #false if only a sample.
158 *
159 * See igt_stats_set_population() for more details.
160 */
igt_stats_is_population(igt_stats_t * stats)161 bool igt_stats_is_population(igt_stats_t *stats)
162 {
163 return stats->is_population;
164 }
165
166 /**
167 * igt_stats_set_population:
168 * @stats: An #igt_stats_t instance
169 * @full_population: Whether we're dealing with sample data or a full
170 * population
171 *
172 * In statistics, we usually deal with a subset of the full data (which may be
173 * a continuous or infinite set). Data analysis is then done on a sample of
174 * this population.
175 *
176 * This has some importance as only having a sample of the data leads to
177 * [biased estimators](https://en.wikipedia.org/wiki/Bias_of_an_estimator). We
178 * currently used the information given by this method to apply
179 * [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
180 * to the variance.
181 *
182 * Note that even if we manage to have an unbiased variance by multiplying
183 * a sample variance by the Bessel's correction, n/(n - 1), the standard
184 * deviation derived from the unbiased variance isn't itself unbiased.
185 * Statisticians talk about a "corrected" standard deviation.
186 *
187 * When giving #true to this function, the data set in @stats is considered a
188 * full population. It's considered a sample of a bigger population otherwise.
189 *
190 * When newly created, @stats defaults to holding sample data.
191 */
igt_stats_set_population(igt_stats_t * stats,bool full_population)192 void igt_stats_set_population(igt_stats_t *stats, bool full_population)
193 {
194 if (full_population == stats->is_population)
195 return;
196
197 stats->is_population = full_population;
198 stats->mean_variance_valid = false;
199 }
200
201 /**
202 * igt_stats_push:
203 * @stats: An #igt_stats_t instance
204 * @value: An integer value
205 *
206 * Adds a new value to the @stats dataset.
207 */
igt_stats_push(igt_stats_t * stats,uint64_t value)208 void igt_stats_push(igt_stats_t *stats, uint64_t value)
209 {
210 if (stats->is_float) {
211 igt_stats_push_float(stats, value);
212 return;
213 }
214
215 igt_stats_ensure_capacity(stats, 1);
216
217 stats->values_u64[stats->n_values++] = value;
218
219 stats->mean_variance_valid = false;
220 stats->sorted_array_valid = false;
221
222 if (value < stats->min)
223 stats->min = value;
224 if (value > stats->max)
225 stats->max = value;
226 }
227
228 /**
229 * igt_stats_push:
230 * @stats: An #igt_stats_t instance
231 * @value: An floating point
232 *
233 * Adds a new value to the @stats dataset and converts the igt_stats from
234 * an integer collection to a floating point one.
235 */
igt_stats_push_float(igt_stats_t * stats,double value)236 void igt_stats_push_float(igt_stats_t *stats, double value)
237 {
238 igt_stats_ensure_capacity(stats, 1);
239
240 if (!stats->is_float) {
241 int n;
242
243 for (n = 0; n < stats->n_values; n++)
244 stats->values_f[n] = stats->values_u64[n];
245
246 stats->is_float = true;
247 }
248
249 stats->values_f[stats->n_values++] = value;
250
251 stats->mean_variance_valid = false;
252 stats->sorted_array_valid = false;
253
254 if (value < stats->range[0])
255 stats->range[0] = value;
256 if (value > stats->range[1])
257 stats->range[1] = value;
258 }
259
260 /**
261 * igt_stats_push_array:
262 * @stats: An #igt_stats_t instance
263 * @values: (array length=n_values): A pointer to an array of data points
264 * @n_values: The number of data points to add
265 *
266 * Adds an array of values to the @stats dataset.
267 */
igt_stats_push_array(igt_stats_t * stats,const uint64_t * values,unsigned int n_values)268 void igt_stats_push_array(igt_stats_t *stats,
269 const uint64_t *values, unsigned int n_values)
270 {
271 unsigned int i;
272
273 igt_stats_ensure_capacity(stats, n_values);
274
275 for (i = 0; i < n_values; i++)
276 igt_stats_push(stats, values[i]);
277 }
278
279 /**
280 * igt_stats_get_min:
281 * @stats: An #igt_stats_t instance
282 *
283 * Retrieves the minimal value in @stats
284 */
igt_stats_get_min(igt_stats_t * stats)285 uint64_t igt_stats_get_min(igt_stats_t *stats)
286 {
287 igt_assert(!stats->is_float);
288 return stats->min;
289 }
290
291 /**
292 * igt_stats_get_max:
293 * @stats: An #igt_stats_t instance
294 *
295 * Retrieves the maximum value in @stats
296 */
igt_stats_get_max(igt_stats_t * stats)297 uint64_t igt_stats_get_max(igt_stats_t *stats)
298 {
299 igt_assert(!stats->is_float);
300 return stats->max;
301 }
302
303 /**
304 * igt_stats_get_range:
305 * @stats: An #igt_stats_t instance
306 *
307 * Retrieves the range of the values in @stats. The range is the difference
308 * between the highest and the lowest value.
309 *
310 * The range can be a deceiving characterization of the values, because there
311 * can be extreme minimal and maximum values that are just anomalies. Prefer
312 * the interquatile range (see igt_stats_get_iqr()) or an histogram.
313 */
igt_stats_get_range(igt_stats_t * stats)314 uint64_t igt_stats_get_range(igt_stats_t *stats)
315 {
316 return igt_stats_get_max(stats) - igt_stats_get_min(stats);
317 }
318
cmp_u64(const void * pa,const void * pb)319 static int cmp_u64(const void *pa, const void *pb)
320 {
321 const uint64_t *a = pa, *b = pb;
322
323 if (*a < *b)
324 return -1;
325 if (*a > *b)
326 return 1;
327 return 0;
328 }
329
cmp_f(const void * pa,const void * pb)330 static int cmp_f(const void *pa, const void *pb)
331 {
332 const double *a = pa, *b = pb;
333
334 if (*a < *b)
335 return -1;
336 if (*a > *b)
337 return 1;
338 return 0;
339 }
340
igt_stats_ensure_sorted_values(igt_stats_t * stats)341 static void igt_stats_ensure_sorted_values(igt_stats_t *stats)
342 {
343 if (stats->sorted_array_valid)
344 return;
345
346 if (!stats->sorted_u64) {
347 /*
348 * igt_stats_ensure_capacity() will free ->sorted when the
349 * capacity increases, which also correspond to an invalidation
350 * of the sorted array. We'll then reallocate it here on
351 * demand.
352 */
353 stats->sorted_u64 = calloc(stats->capacity,
354 sizeof(*stats->values_u64));
355 igt_assert(stats->sorted_u64);
356 }
357
358 memcpy(stats->sorted_u64, stats->values_u64,
359 sizeof(*stats->values_u64) * stats->n_values);
360
361 qsort(stats->sorted_u64, stats->n_values, sizeof(*stats->values_u64),
362 stats->is_float ? cmp_f : cmp_u64);
363
364 stats->sorted_array_valid = true;
365 }
366
367 /*
368 * We use Tukey's hinge for our quartiles determination.
369 * ends (end, lower_end) are exclusive.
370 */
371 static double
igt_stats_get_median_internal(igt_stats_t * stats,unsigned int start,unsigned int end,unsigned int * lower_end,unsigned int * upper_start)372 igt_stats_get_median_internal(igt_stats_t *stats,
373 unsigned int start, unsigned int end,
374 unsigned int *lower_end /* out */,
375 unsigned int *upper_start /* out */)
376 {
377 unsigned int mid, n_values = end - start;
378 double median;
379
380 igt_stats_ensure_sorted_values(stats);
381
382 /* odd number of data points */
383 if (n_values % 2 == 1) {
384 /* median is the value in the middle (actual datum) */
385 mid = start + n_values / 2;
386 median = sorted_value(stats, mid);
387
388 /* the two halves contain the median value */
389 if (lower_end)
390 *lower_end = mid + 1;
391 if (upper_start)
392 *upper_start = mid;
393
394 /* even number of data points */
395 } else {
396 /*
397 * The middle is in between two indexes, 'mid' points at the
398 * lower one. The median is then the average between those two
399 * values.
400 */
401 mid = start + n_values / 2 - 1;
402 median = (sorted_value(stats, mid) + sorted_value(stats, mid+1))/2.;
403
404 if (lower_end)
405 *lower_end = mid + 1;
406 if (upper_start)
407 *upper_start = mid + 1;
408 }
409
410 return median;
411 }
412
413 /**
414 * igt_stats_get_quartiles:
415 * @stats: An #igt_stats_t instance
416 * @q1: (out): lower or 25th quartile
417 * @q2: (out): median or 50th quartile
418 * @q3: (out): upper or 75th quartile
419 *
420 * Retrieves the [quartiles](https://en.wikipedia.org/wiki/Quartile) of the
421 * @stats dataset.
422 */
igt_stats_get_quartiles(igt_stats_t * stats,double * q1,double * q2,double * q3)423 void igt_stats_get_quartiles(igt_stats_t *stats,
424 double *q1, double *q2, double *q3)
425 {
426 unsigned int lower_end, upper_start;
427 double ret;
428
429 if (stats->n_values < 3) {
430 if (q1)
431 *q1 = 0.;
432 if (q2)
433 *q2 = 0.;
434 if (q3)
435 *q3 = 0.;
436 return;
437 }
438
439 ret = igt_stats_get_median_internal(stats, 0, stats->n_values,
440 &lower_end, &upper_start);
441 if (q2)
442 *q2 = ret;
443
444 ret = igt_stats_get_median_internal(stats, 0, lower_end, NULL, NULL);
445 if (q1)
446 *q1 = ret;
447
448 ret = igt_stats_get_median_internal(stats, upper_start, stats->n_values,
449 NULL, NULL);
450 if (q3)
451 *q3 = ret;
452 }
453
454 /**
455 * igt_stats_get_iqr:
456 * @stats: An #igt_stats_t instance
457 *
458 * Retrieves the
459 * [interquartile range](https://en.wikipedia.org/wiki/Interquartile_range)
460 * (IQR) of the @stats dataset.
461 */
igt_stats_get_iqr(igt_stats_t * stats)462 double igt_stats_get_iqr(igt_stats_t *stats)
463 {
464 double q1, q3;
465
466 igt_stats_get_quartiles(stats, &q1, NULL, &q3);
467 return (q3 - q1);
468 }
469
470 /**
471 * igt_stats_get_median:
472 * @stats: An #igt_stats_t instance
473 *
474 * Retrieves the median of the @stats dataset.
475 */
igt_stats_get_median(igt_stats_t * stats)476 double igt_stats_get_median(igt_stats_t *stats)
477 {
478 return igt_stats_get_median_internal(stats, 0, stats->n_values,
479 NULL, NULL);
480 }
481
482 /*
483 * Algorithm popularised by Knuth in:
484 *
485 * The Art of Computer Programming, volume 2: Seminumerical Algorithms,
486 * 3rd edn., p. 232. Boston: Addison-Wesley
487 *
488 * Source: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
489 */
igt_stats_knuth_mean_variance(igt_stats_t * stats)490 static void igt_stats_knuth_mean_variance(igt_stats_t *stats)
491 {
492 double mean = 0., m2 = 0.;
493 unsigned int i;
494
495 if (stats->mean_variance_valid)
496 return;
497
498 for (i = 0; i < stats->n_values; i++) {
499 double delta = unsorted_value(stats, i) - mean;
500
501 mean += delta / (i + 1);
502 m2 += delta * (unsorted_value(stats, i) - mean);
503 }
504
505 stats->mean = mean;
506 if (stats->n_values > 1 && !stats->is_population)
507 stats->variance = m2 / (stats->n_values - 1);
508 else
509 stats->variance = m2 / stats->n_values;
510 stats->mean_variance_valid = true;
511 }
512
513 /**
514 * igt_stats_get_mean:
515 * @stats: An #igt_stats_t instance
516 *
517 * Retrieves the mean of the @stats dataset.
518 */
igt_stats_get_mean(igt_stats_t * stats)519 double igt_stats_get_mean(igt_stats_t *stats)
520 {
521 igt_stats_knuth_mean_variance(stats);
522
523 return stats->mean;
524 }
525
526 /**
527 * igt_stats_get_variance:
528 * @stats: An #igt_stats_t instance
529 *
530 * Retrieves the variance of the @stats dataset.
531 */
igt_stats_get_variance(igt_stats_t * stats)532 double igt_stats_get_variance(igt_stats_t *stats)
533 {
534 igt_stats_knuth_mean_variance(stats);
535
536 return stats->variance;
537 }
538
539 /**
540 * igt_stats_get_std_deviation:
541 * @stats: An #igt_stats_t instance
542 *
543 * Retrieves the standard deviation of the @stats dataset.
544 */
igt_stats_get_std_deviation(igt_stats_t * stats)545 double igt_stats_get_std_deviation(igt_stats_t *stats)
546 {
547 igt_stats_knuth_mean_variance(stats);
548
549 return sqrt(stats->variance);
550 }
551
552 /**
553 * igt_stats_get_iqm:
554 * @stats: An #igt_stats_t instance
555 *
556 * Retrieves the
557 * [interquartile mean](https://en.wikipedia.org/wiki/Interquartile_mean) (IQM)
558 * of the @stats dataset.
559 *
560 * The interquartile mean is a "statistical measure of central tendency".
561 * It is a truncated mean that discards the lowest and highest 25% of values,
562 * and calculates the mean value of the remaining central values.
563 *
564 * It's useful to hide outliers in measurements (due to cold cache etc).
565 */
igt_stats_get_iqm(igt_stats_t * stats)566 double igt_stats_get_iqm(igt_stats_t *stats)
567 {
568 unsigned int q1, q3, i;
569 double mean;
570
571 igt_stats_ensure_sorted_values(stats);
572
573 q1 = (stats->n_values + 3) / 4;
574 q3 = 3 * stats->n_values / 4;
575
576 mean = 0;
577 for (i = 0; i <= q3 - q1; i++)
578 mean += (sorted_value(stats, q1 + i) - mean) / (i + 1);
579
580 if (stats->n_values % 4) {
581 double rem = .5 * (stats->n_values % 4) / 4;
582
583 q1 = (stats->n_values) / 4;
584 q3 = (3 * stats->n_values + 3) / 4;
585
586 mean += rem * (sorted_value(stats, q1) - mean) / i++;
587 mean += rem * (sorted_value(stats, q3) - mean) / i++;
588 }
589
590 return mean;
591 }
592
593 /**
594 * igt_stats_get_trimean:
595 * @stats: An #igt_stats_t instance
596 *
597 * Retrieves the [trimean](https://en.wikipedia.org/wiki/Trimean) of the @stats
598 * dataset.
599 *
600 * The trimean is a the most efficient 3-point L-estimator, even more
601 * robust than the median at estimating the average of a sample population.
602 */
igt_stats_get_trimean(igt_stats_t * stats)603 double igt_stats_get_trimean(igt_stats_t *stats)
604 {
605 double q1, q2, q3;
606 igt_stats_get_quartiles(stats, &q1, &q2, &q3);
607 return (q1 + 2*q2 + q3) / 4;
608 }
609
610 /**
611 * igt_mean_init:
612 * @m: tracking structure
613 *
614 * Initializes or resets @m.
615 */
igt_mean_init(struct igt_mean * m)616 void igt_mean_init(struct igt_mean *m)
617 {
618 memset(m, 0, sizeof(*m));
619 m->max = -HUGE_VAL;
620 m->min = HUGE_VAL;
621 }
622
623 /**
624 * igt_mean_add:
625 * @m: tracking structure
626 * @v: value
627 *
628 * Adds a new value @v to @m.
629 */
igt_mean_add(struct igt_mean * m,double v)630 void igt_mean_add(struct igt_mean *m, double v)
631 {
632 double delta = v - m->mean;
633 m->mean += delta / ++m->count;
634 m->sq += delta * (v - m->mean);
635 if (v < m->min)
636 m->min = v;
637 if (v > m->max)
638 m->max = v;
639 }
640
641 /**
642 * igt_mean_get:
643 * @m: tracking structure
644 *
645 * Computes the current mean of the samples tracked in @m.
646 */
igt_mean_get(struct igt_mean * m)647 double igt_mean_get(struct igt_mean *m)
648 {
649 return m->mean;
650 }
651
652 /**
653 * igt_mean_get_variance:
654 * @m: tracking structure
655 *
656 * Computes the current variance of the samples tracked in @m.
657 */
igt_mean_get_variance(struct igt_mean * m)658 double igt_mean_get_variance(struct igt_mean *m)
659 {
660 return m->sq / m->count;
661 }
662
663