From 42cd5d4c3734ad4ef7fcc9ea9e64ca8f3f17c57c Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Fri, 2 Feb 2018 18:37:47 +0000
Subject: tests/perf_pmu: More busy measurement tightening

Where we use measured sleeps, take PMU samples immediately before and
after and look at their delta in order to minimize the effect of any
test setup delays.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/perf_pmu.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

(limited to 'tests/perf_pmu.c')

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index bf16e5e8..bdf452c8 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -145,8 +145,9 @@ single(int gem_fd, const struct intel_execution_engine2 *e, bool busy)
 	else
 		spin = NULL;
 
-	slept = measured_usleep(batch_duration_ns / 1000);
 	val = pmu_read_single(fd);
+	slept = measured_usleep(batch_duration_ns / 1000);
+	val = pmu_read_single(fd) - val;
 
 	igt_spin_batch_end(spin);
 	igt_spin_batch_free(gem_fd, spin);
@@ -180,8 +181,9 @@ busy_start(int gem_fd, const struct intel_execution_engine2 *e)
 
 	fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
 
-	slept = measured_usleep(batch_duration_ns / 1000);
 	val = pmu_read_single(fd);
+	slept = measured_usleep(batch_duration_ns / 1000);
+	val = pmu_read_single(fd) - val;
 
 	igt_spin_batch_free(gem_fd, spin);
 	close(fd);
@@ -227,8 +229,9 @@ busy_double_start(int gem_fd, const struct intel_execution_engine2 *e)
 	 */
 	fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
 
-	slept = measured_usleep(batch_duration_ns / 1000);
 	val = pmu_read_single(fd);
+	slept = measured_usleep(batch_duration_ns / 1000);
+	val = pmu_read_single(fd) - val;
 
 	igt_spin_batch_end(spin[0]);
 	igt_spin_batch_end(spin[1]);
@@ -279,6 +282,7 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
 	       const unsigned int num_engines)
 {
 	const struct intel_execution_engine2 *e_;
+	uint64_t tval[2][num_engines];
 	uint64_t val[num_engines];
 	int fd[num_engines];
 	unsigned long slept;
@@ -301,13 +305,17 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
 	igt_assert_eq(i, num_engines);
 
 	spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+	pmu_read_multi(fd[0], num_engines, tval[0]);
 	slept = measured_usleep(batch_duration_ns / 1000);
-	pmu_read_multi(fd[0], num_engines, val);
+	pmu_read_multi(fd[0], num_engines, tval[1]);
 
 	igt_spin_batch_end(spin);
 	igt_spin_batch_free(gem_fd, spin);
 	close(fd[0]);
 
+	for (i = 0; i < num_engines; i++)
+		val[i] = tval[1][i] - tval[0][i];
+
 	log_busy(num_engines, val);
 
 	assert_within_epsilon(val[busy_idx], slept, tolerance);
@@ -324,6 +332,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
 		    const unsigned int num_engines)
 {
 	const struct intel_execution_engine2 *e_;
+	uint64_t tval[2][num_engines];
 	uint64_t val[num_engines];
 	int fd[num_engines];
 	unsigned long slept;
@@ -362,13 +371,17 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
 	for (i = 0; i < num_engines; i++)
 		fd[i] = open_group(val[i], fd[0]);
 
+	pmu_read_multi(fd[0], num_engines, tval[0]);
 	slept = measured_usleep(batch_duration_ns / 1000);
-	pmu_read_multi(fd[0], num_engines, val);
+	pmu_read_multi(fd[0], num_engines, tval[1]);
 
 	igt_spin_batch_end(spin);
 	igt_spin_batch_free(gem_fd, spin);
 	close(fd[0]);
 
+	for (i = 0; i < num_engines; i++)
+		val[i] = tval[1][i] - tval[0][i];
+
 	log_busy(num_engines, val);
 
 	for (i = 0; i < num_engines; i++) {
@@ -384,6 +397,7 @@ static void
 all_busy_check_all(int gem_fd, const unsigned int num_engines)
 {
 	const struct intel_execution_engine2 *e;
+	uint64_t tval[2][num_engines];
 	uint64_t val[num_engines];
 	int fd[num_engines];
 	unsigned long slept;
@@ -418,13 +432,17 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines)
 	for (i = 0; i < num_engines; i++)
 		fd[i] = open_group(val[i], fd[0]);
 
+	pmu_read_multi(fd[0], num_engines, tval[0]);
 	slept = measured_usleep(batch_duration_ns / 1000);
-	pmu_read_multi(fd[0], num_engines, val);
+	pmu_read_multi(fd[0], num_engines, tval[1]);
 
 	igt_spin_batch_end(spin);
 	igt_spin_batch_free(gem_fd, spin);
 	close(fd[0]);
 
+	for (i = 0; i < num_engines; i++)
+		val[i] = tval[1][i] - tval[0][i];
+
 	log_busy(num_engines, val);
 
 	for (i = 0; i < num_engines; i++)
@@ -765,13 +783,15 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e)
 	spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
 	igt_spin_batch_set_timeout(spin, 2 * batch_duration_ns);
 
-	slept = measured_usleep(batch_duration_ns / 1000);
+	val[0] = pmu_read_single(fd[0]);
 	val[1] = pmu_read_single(fd[1]);
+	slept = measured_usleep(batch_duration_ns / 1000);
+	val[1] = pmu_read_single(fd[1]) - val[1];
 	close(fd[1]);
 
 	gem_sync(gem_fd, spin->handle);
 
-	val[0] = pmu_read_single(fd[0]);
+	val[0] = pmu_read_single(fd[0]) - val[0];
 
 	igt_spin_batch_free(gem_fd, spin);
 	close(fd[0]);
-- 
cgit v1.2.3