From 1b8997b3f89f5c7632782d2e7f8509a0f8176891 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 27 Jun 2015 15:33:58 +0100 Subject: stats: Add support for quartiles (and thus median) More stuff, quite useful characteristics of a dataset. Signed-off-by: Damien Lespiau --- lib/igt_stats.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/igt_stats.h | 5 ++ lib/tests/igt_stats.c | 41 ++++++++++++++++ 3 files changed, 177 insertions(+) diff --git a/lib/igt_stats.c b/lib/igt_stats.c index 5c1f23ff..66fd5636 100644 --- a/lib/igt_stats.c +++ b/lib/igt_stats.c @@ -23,6 +23,7 @@ */ #include +#include #include #include "igt_core.h" @@ -94,6 +95,7 @@ void igt_stats_init(igt_stats_t *stats, unsigned int capacity) void igt_stats_fini(igt_stats_t *stats) { free(stats->values); + free(stats->sorted); } @@ -158,6 +160,7 @@ void igt_stats_push(igt_stats_t *stats, uint64_t value) stats->values[stats->n_values++] = value; stats->mean_variance_valid = false; + stats->sorted_array_valid = false; if (value < stats->min) stats->min = value; @@ -220,6 +223,134 @@ uint64_t igt_stats_get_range(igt_stats_t *stats) return igt_stats_get_max(stats) - igt_stats_get_min(stats); } +static int cmp_u64(const void *pa, const void *pb) +{ + const uint64_t *a = pa, *b = pb; + + if (*a < *b) + return -1; + if (*a > *b) + return 1; + return 0; +} + +static void igt_stats_ensure_sorted_values(igt_stats_t *stats) +{ + if (stats->sorted_array_valid) + return; + + if (!stats->sorted) { + stats->sorted = calloc(stats->capacity, sizeof(*stats->values)); + igt_assert(stats->sorted); + } + + memcpy(stats->sorted, stats->values, + sizeof(*stats->values) * stats->n_values); + + qsort(stats->sorted, stats->n_values, sizeof(*stats->values), cmp_u64); + + stats->sorted_array_valid = true; +} + +/* + * We use Tukey's hinge for our quartiles determination. + * ends (end, lower_end) are exclusive. + */ +static double +igt_stats_get_median_internal(igt_stats_t *stats, + unsigned int start, unsigned int end, + unsigned int *lower_end /* out */, + unsigned int *upper_start /* out */) +{ + unsigned int mid, n_values = end - start; + double median; + + igt_stats_ensure_sorted_values(stats); + + /* odd number of data points */ + if (n_values % 2 == 1) { + /* median is the value in the middle (actual datum) */ + mid = start + n_values / 2; + median = stats->sorted[mid]; + + /* the two halves contain the median value */ + if (lower_end) + *lower_end = mid + 1; + if (upper_start) + *upper_start = mid; + + /* even number of data points */ + } else { + /* + * The middle is in between two indexes, 'mid' points at the + * lower one. The median is then the average between those two + * values. + */ + mid = start + n_values / 2 - 1; + median = (stats->sorted[mid] + stats->sorted[mid + 1]) / 2.; + + if (lower_end) + *lower_end = mid + 1; + if (upper_start) + *upper_start = mid + 1; + } + + return median; +} + +/** + * igt_stats_get_quartiles: + * @stats: An #igt_stats_t instance + * @q1: (out): lower or 25th quartile + * @q2: (out): median or 50th quartile + * @q3: (out): upper or 75th quartile + * + * Retrieves the [quartiles](https://en.wikipedia.org/wiki/Quartile) of the + * @stats dataset. + */ +void igt_stats_get_quartiles(igt_stats_t *stats, + double *q1, double *q2, double *q3) +{ + unsigned int lower_end, upper_start; + double ret; + + if (stats->n_values < 3) { + if (q1) + *q1 = 0.; + if (q2) + *q2 = 0.; + if (q3) + *q3 = 0.; + return; + } + + ret = igt_stats_get_median_internal(stats, 0, stats->n_values, + &lower_end, &upper_start); + if (q2) + *q2 = ret; + + ret = igt_stats_get_median_internal(stats, 0, lower_end, NULL, NULL); + if (q1) + *q1 = ret; + + ret = igt_stats_get_median_internal(stats, upper_start, stats->n_values, + NULL, NULL); + if (q3) + *q3 = ret; +} + +/** + * igt_stats_get_median: + * @stats: An #igt_stats_t instance + * + * Retrieves the median of the @stats dataset. + */ +double igt_stats_get_median(igt_stats_t *stats) +{ + return igt_stats_get_median_internal(stats, 0, stats->n_values, + NULL, NULL); +} + /* * Algorithm popularised by Knuth in: * diff --git a/lib/igt_stats.h b/lib/igt_stats.h index 46437c48..887fa79c 100644 --- a/lib/igt_stats.h +++ b/lib/igt_stats.h @@ -41,8 +41,10 @@ typedef struct { unsigned int capacity; unsigned int is_population : 1; unsigned int mean_variance_valid : 1; + unsigned int sorted_array_valid : 1; uint64_t min, max; double mean, variance; + uint64_t *sorted; } igt_stats_t; void igt_stats_init(igt_stats_t *stats, unsigned int capacity); @@ -55,7 +57,10 @@ void igt_stats_push_array(igt_stats_t *stats, uint64_t igt_stats_get_min(igt_stats_t *stats); uint64_t igt_stats_get_max(igt_stats_t *stats); uint64_t igt_stats_get_range(igt_stats_t *stats); +void igt_stats_get_quartiles(igt_stats_t *stats, + double *q1, double *q2, double *q3); double igt_stats_get_mean(igt_stats_t *stats); +double igt_stats_get_median(igt_stats_t *stats); double igt_stats_get_variance(igt_stats_t *stats); double igt_stats_get_std_deviation(igt_stats_t *stats); diff --git a/lib/tests/igt_stats.c b/lib/tests/igt_stats.c index 468bba27..c4c67760 100644 --- a/lib/tests/igt_stats.c +++ b/lib/tests/igt_stats.c @@ -25,6 +25,8 @@ #include "igt_core.h" #include "igt_stats.h" +#define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0])) + static void push_fixture_1(igt_stats_t *stats) { igt_stats_push(stats, 2); @@ -78,6 +80,44 @@ static void test_range(void) igt_assert(igt_stats_get_range(&stats) == 8); } +/* + * Examples taken from: https://en.wikipedia.org/wiki/Quartile + * The values are shifted a bit to test we do indeed start by sorting data + * set. + */ +static void test_quartiles(void) +{ + static const uint64_t s1[] = + { 47, 49, 6, 7, 15, 36, 39, 40, 41, 42, 43 }; + static const uint64_t s2[] = { 40, 41, 7, 15, 36, 39 }; + igt_stats_t stats; + double q1, q2, q3; + + /* s1, odd number of data points */ + igt_stats_init(&stats, ARRAY_SIZE(s1)); + igt_stats_push_array(&stats, s1, ARRAY_SIZE(s1)); + + igt_stats_get_quartiles(&stats, &q1, &q2, &q3); + igt_assert_eq_double(q1, 25.5); + igt_assert_eq_double(q2, 40); + igt_assert_eq_double(q3, 42.5); + igt_assert_eq_double(igt_stats_get_median(&stats), 40); + + igt_stats_fini(&stats); + + /* s1, even number of data points */ + igt_stats_init(&stats, ARRAY_SIZE(s2)); + igt_stats_push_array(&stats, s2, ARRAY_SIZE(s2)); + + igt_stats_get_quartiles(&stats, &q1, &q2, &q3); + igt_assert_eq_double(q1, 15); + igt_assert_eq_double(q2, 37.5); + igt_assert_eq_double(q3, 40); + igt_assert_eq_double(igt_stats_get_median(&stats), 37.5); + + igt_stats_fini(&stats); +} + static void test_mean(void) { igt_stats_t stats; @@ -150,6 +190,7 @@ igt_simple_main test_init(); test_min_max(); test_range(); + test_quartiles(); test_mean(); test_invalidate_mean(); test_std_deviation(); -- cgit v1.2.3