From 3a5cf84317197cdac88196cda76c6a7e08943f20 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 26 Jun 2015 17:02:09 +0100 Subject: stats: Add a way to specify if the data set is a population or a sample This changes how we compute the variance. We want an unbiased variance when reasoning about a sample. Signed-off-by: Damien Lespiau --- lib/igt_stats.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'lib/igt_stats.c') diff --git a/lib/igt_stats.c b/lib/igt_stats.c index 0bee1386..c7d5fbd2 100644 --- a/lib/igt_stats.c +++ b/lib/igt_stats.c @@ -91,6 +91,36 @@ void igt_stats_fini(igt_stats_t *stats) free(stats->values); } +/** + * igt_stats_set_population: + * @stats: An #igt_stats_t instance + * @full_population: Whether we're dealing with sample data or a full + * population + * + * In statistics, we usually deal with a subset of the full data (which may be + * a continuous or infinite set). Data analysis is then done on a sample of + * this population. + * + * This has some importance as only having a sample of the data leads to + * [biased estimators](https://en.wikipedia.org/wiki/Bias_of_an_estimator). We + * currently used the information given by this method to apply + * [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction) + * to the variance. + * + * When giving #true to this function, the data set in @stats is considered a + * full population. It's considered a sample of a bigger population otherwise. + * + * When newly created, @stats defaults to holding sample data. + */ +void igt_stats_set_population(igt_stats_t *stats, bool full_population) +{ + if (full_population == stats->is_population) + return; + + stats->is_population = full_population; + stats->mean_variance_valid = false; +} + /** * igt_stats_push: * @stats: An #igt_stats_t instance @@ -129,7 +159,10 @@ static void igt_stats_knuth_mean_variance(igt_stats_t *stats) } stats->mean = mean; - stats->variance = m2 / stats->n_values; + if (stats->n_values > 1 && !stats->is_population) + stats->variance = m2 / (stats->n_values - 1); + else + stats->variance = m2 / stats->n_values; stats->mean_variance_valid = true; } -- cgit v1.2.3