i915/gem_exec_nop: Remove submission batching

Execbuf requests are now submitted by subtests in batches of 1024 repetitions. That may be too many under some circumstances (e.g., intensive logging output) and subtests may take far more time than expected. The reason standing behind that batching was unacceptable microsecond imprecision of gettime when gem_exec_nop was a benchmark rather than a test and time measurement was looking for a precision of ~100 ns. Since that measurement is now mostly informative and not a pass/fail metric, we can be more tolerant and accept overhead of gettime after each submission. Remove the batching from the body of subtests which don't require submicrosecond precision and measure time after each execbuf request submission (or a group of one submission per engine). Since there is one subtest - "headless" - which still requires more precise time measurement, don't remove the batching from nop_on_ring() helper but let its users request non-batched submission mode instead. To make this even more flexible, change semantics of the helper argument used so far for returning the count of submissions completed within the requested time frame and use it also for passing desired batch size (number of iterations), then update its users to initialize that argument according to their individual requirements. Note that this will impact slow hpet, such as Pineview, most. But equally the platform themselves is slow enough that a multi-millisecond clock granularity is still a minor irritation. Suggested-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com> 2020-05-08 15:56:31 +0200
committer: Chris Wilson <chris@chris-wilson.co.uk> 2020-05-09 10:13:46 +0100
commit: 16f067ae42a6a93b8f0c5835210e2575a883001b (patch)
tree: 7590ea016b57988f8f0053b56099529332942a76 /tests/i915/gem_exec_nop.c
parent: 6a92555e7c5bc3b05169966fa42086d2dbb0f818 (diff)
1 files changed, 44 insertions, 69 deletions
diff --git a/tests/i915/gem_exec_nop.c b/tests/i915/gem_exec_nop.c
index c17d672c..1c17d023 100644
--- a/tests/i915/gem_exec_nop.c
+++ b/tests/i915/gem_exec_nop.c
@@ -59,9 +59,8 @@
 #define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
 #define MAX_ENGINES (I915_EXEC_RING_MASK + 1)
 
-#define FORKED 1
-#define CHAINED 2
-#define CONTEXT 4
+#define FORKED (1 << 0)
+#define CONTEXT (1 << 1)
 
 static double elapsed(const struct timespec *start, const struct timespec *end)
 {
@@ -70,7 +69,8 @@ static double elapsed(const struct timespec *start, const struct timespec *end)
 }
 
 static double nop_on_ring(int fd, uint32_t handle,
-			  const struct intel_execution_engine2 *e, int timeout,
+			  const struct intel_execution_engine2 *e,
+			  int timeout,
 			  unsigned long *out)
 {
 	struct drm_i915_gem_execbuffer2 execbuf;
@@ -96,10 +96,9 @@ static double nop_on_ring(int fd, uint32_t handle,
 	count = 0;
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	do {
-		for (int loop = 0; loop < 1024; loop++)
-			gem_execbuf(fd, &execbuf);
+		gem_execbuf(fd, &execbuf);
+		count++;
 
-		count += 1024;
 		clock_gettime(CLOCK_MONOTONIC, &now);
 	} while (elapsed(&start, &now) < timeout);
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
@@ -481,9 +480,9 @@ static void parallel(int fd, uint32_t handle, int timeout)
 		count = 0;
 		clock_gettime(CLOCK_MONOTONIC, &start);
 		do {
-			for (int loop = 0; loop < 1024; loop++)
-				gem_execbuf(fd, &execbuf);
-			count += 1024;
+			gem_execbuf(fd, &execbuf);
+			count++;
+
 			clock_gettime(CLOCK_MONOTONIC, &now);
 		} while (elapsed(&start, &now) < timeout);
 		time = elapsed(&start, &now) / count;
@@ -547,9 +546,9 @@ static void independent(int fd, uint32_t handle, int timeout)
 		count = 0;
 		clock_gettime(CLOCK_MONOTONIC, &start);
 		do {
-			for (int loop = 0; loop < 1024; loop++)
-				gem_execbuf(fd, &execbuf);
-			count += 1024;
+			gem_execbuf(fd, &execbuf);
+			count++;
+
 			clock_gettime(CLOCK_MONOTONIC, &now);
 		} while (elapsed(&start, &now) < timeout);
 		time = elapsed(&start, &now) / count;
@@ -604,9 +603,9 @@ static void multiple(int fd,
 		count = 0;
 		clock_gettime(CLOCK_MONOTONIC, &start);
 		do {
-			for (int loop = 0; loop < 1024; loop++)
-				gem_execbuf(i915, &execbuf);
-			count += 1024;
+			gem_execbuf(i915, &execbuf);
+			count++;
+
 			clock_gettime(CLOCK_MONOTONIC, &now);
 		} while (elapsed(&start, &now) < timeout);
 		time = elapsed(&start, &now) / count;
@@ -664,14 +663,12 @@ static void series(int fd, uint32_t handle, int timeout)
 	count = 0;
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	do {
-		for (int loop = 0; loop < 1024; loop++) {
-			for (int n = 0; n < nengine; n++) {
-				execbuf.flags &= ~ENGINE_FLAGS;
-				execbuf.flags |= engines[n];
-				gem_execbuf(fd, &execbuf);
-			}
+		for (int n = 0; n < nengine; n++) {
+			execbuf.flags &= ~ENGINE_FLAGS;
+			execbuf.flags |= engines[n];
+			gem_execbuf(fd, &execbuf);
 		}
-		count += nengine * 1024;
+		count += nengine;
 		clock_gettime(CLOCK_MONOTONIC, &now);
 	} while (elapsed(&start, &now) < timeout); /* Hang detection ~120s */
 	gem_sync(fd, handle);
@@ -767,25 +764,14 @@ static void sequential(int fd, uint32_t handle, unsigned flags, int timeout)
 		clock_gettime(CLOCK_MONOTONIC, &start);
 		do {
 			igt_permute_array(engines, nengine, xchg);
-			if (flags & CHAINED) {
-				for (n = 0; n < nengine; n++) {
-					execbuf.flags &= ~ENGINE_FLAGS;
-					execbuf.flags |= engines[n];
-					for (int loop = 0; loop < 1024; loop++)
-						gem_execbuf(fd, &execbuf);
-				}
-			} else {
-				for (int loop = 0; loop < 1024; loop++) {
-					for (n = 0; n < nengine; n++) {
-						execbuf.flags &= ~ENGINE_FLAGS;
-						execbuf.flags |= engines[n];
-						gem_execbuf(fd, &execbuf);
-					}
-				}
+			for (n = 0; n < nengine; n++) {
+				execbuf.flags &= ~ENGINE_FLAGS;
+				execbuf.flags |= engines[n];
+				gem_execbuf(fd, &execbuf);
 			}
-			count += 1024;
+			count++;
 			clock_gettime(CLOCK_MONOTONIC, &now);
-		} while (elapsed(&start, &now) < timeout); /* Hang detection ~120s */
+		} while (elapsed(&start, &now) < timeout);
 
 		gem_sync(fd, obj[0].handle);
 		clock_gettime(CLOCK_MONOTONIC, &now);
@@ -869,26 +855,24 @@ static void fence_signal(int fd, uint32_t handle,
 	intel_detect_and_clear_missed_interrupts(fd);
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	do {
-		for (int loop = 0; loop < 1024; loop++) {
-			for (int e = 0; e < nengine; e++) {
-				if (fences[n] != -1) {
-					igt_assert(fence_wait(fences[n]));
-					close(fences[n]);
-				}
+		for (int e = 0; e < nengine; e++) {
+			if (fences[n] != -1) {
+				igt_assert(fence_wait(fences[n]));
+				close(fences[n]);
+			}
 
-				execbuf.flags &= ~ENGINE_FLAGS;
-				execbuf.flags |= engines[e];
-				gem_execbuf_wr(fd, &execbuf);
+			execbuf.flags &= ~ENGINE_FLAGS;
+			execbuf.flags |= engines[e];
+			gem_execbuf_wr(fd, &execbuf);
 
-				/* Enable signaling by doing a poll() */
-				fences[n] = execbuf.rsvd2 >> 32;
-				signal += fence_enable_signaling(fences[n]);
+			/* Enable signaling by doing a poll() */
+			fences[n] = execbuf.rsvd2 >> 32;
+			signal += fence_enable_signaling(fences[n]);
 
-				n = (n + 1) % NFENCES;
-			}
+			n = (n + 1) % NFENCES;
 		}
 
-		count += 1024 * nengine;
+		count += nengine;
 		clock_gettime(CLOCK_MONOTONIC, &now);
 	} while (elapsed(&start, &now) < timeout);
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
@@ -910,6 +894,7 @@ static void preempt(int fd, uint32_t handle,
 	struct timespec start, now;
 	unsigned long count;
 	uint32_t ctx[2];
+	igt_spin_t *spin;
 
 	ctx[0] = gem_context_clone_with_engines(fd, 0);
 	gem_context_set_priority(fd, ctx[0], MIN_PRIO);
@@ -934,21 +919,14 @@ static void preempt(int fd, uint32_t handle,
 	intel_detect_and_clear_missed_interrupts(fd);
 
 	count = 0;
+	spin = __igt_spin_new(fd, .ctx = ctx[0], .engine = e->flags);
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	do {
-		igt_spin_t *spin =
-			__igt_spin_new(fd,
-				       .ctx = ctx[0],
-				       .engine = e->flags);
-
-		for (int loop = 0; loop < 1024; loop++)
-			gem_execbuf(fd, &execbuf);
-
-		igt_spin_free(fd, spin);
-
-		count += 1024;
+		gem_execbuf(fd, &execbuf);
+		count++;
 		clock_gettime(CLOCK_MONOTONIC, &now);
 	} while (elapsed(&start, &now) < 20);
+	igt_spin_free(fd, spin);
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
 
 	gem_context_destroy(fd, ctx[1]);
@@ -1028,9 +1006,6 @@ igt_main
 	igt_subtest("forked-sequential")
 		sequential(device, handle, FORKED, 20);
 
-	igt_subtest("chained-sequential")
-		sequential(device, handle, FORKED | CHAINED, 20);
-
 	igt_subtest("context-sequential")
 		sequential(device, handle, FORKED | CONTEXT, 20);
author	Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>	2020-05-08 15:56:31 +0200
committer	Chris Wilson <chris@chris-wilson.co.uk>	2020-05-09 10:13:46 +0100
commit	16f067ae42a6a93b8f0c5835210e2575a883001b (patch)
tree	7590ea016b57988f8f0053b56099529332942a76 /tests/i915/gem_exec_nop.c
parent	6a92555e7c5bc3b05169966fa42086d2dbb0f818 (diff)