benchmarks/gem_latency: Measure fence wakeup latencies

Useful for comparing the cost of explict fences versus implicit. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2016-09-02 17:25:55 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2016-09-02 17:27:55 +0100
commit: f92e47f4014ac5e9b115bf43ad40fde5182ad636 (patch)
tree: e5ed4ae1afe810b1c5160c6f47364ef35631790b
parent: a28e9e38a9efc6daf5a08d60d29adcd3e328fe6f (diff)
2 files changed, 63 insertions, 7 deletions
diff --git a/benchmarks/ezbench.d/gem_latency.test b/benchmarks/ezbench.d/gem_latency.test
index a62e1026..22d91b2e 100644
--- a/benchmarks/ezbench.d/gem_latency.test
+++ b/benchmarks/ezbench.d/gem_latency.test
@@ -33,47 +33,63 @@ done
 # Measure the wakeup delay (each wakeup should be uncontended)
 __gem:latency:wait__() {
     for (( c=0; c<$1; c++ )); do
-        sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 2
+        sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 2 $3
     done
 }
 for p in 1 2 4 8 16 32 64 128 256 512 1024; do
 	name="gem:latency:wait:$p"
 	test_name="$test_name $name"
 	eval "${name}_run() { __gem:latency:wait__ \$1 $p ; } "
+
+	name="gem:latency:wait:$p:fence"
+	test_name="$test_name $name"
+	eval "${name}_run() { __gem:latency:wait__ \$1 $p -F ; } "
 done
 
 # Measure the total CPU cycles for the uncontended wakeups
 __gem:latency:cpu__() {
     for (( c=0; c<$1; c++ )); do
-        sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 4
+        sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 4 $3
     done
 }
 for p in 1 2 4 8 16 32 64 128 512 1024; do
 	name="gem:latency:cpu:$p"
 	test_name="$test_name $name"
 	eval "${name}_run() { __gem:latency:cpu__ \$1 $p ; } "
+
+	name="gem:latency:cpu:$p:fence"
+	test_name="$test_name $name"
+	eval "${name}_run() { __gem:latency:cpu__ \$1 $p -F ; } "
 done
 
 # Measure the wakeup delay for contended wakeups (multiple waiters per request)
 __gem:latency:herd__() {
     for (( c=0; c<$1; c++ )); do
-        sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -f 2
+        sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -f 2 $3
     done
 }
 for p in 0 1 2 4 8 16 32 64 128 512 1024; do
 	name="gem:latency:herd:$p"
 	test_name="$test_name $name"
 	eval "${name}_run() { __gem:latency:herd__ \$1 $p ; } "
+
+	name="gem:latency:herd:$p:fence"
+	test_name="$test_name $name"
+	eval "${name}_run() { __gem:latency:herd__ \$1 $p -F ; } "
 done
 
 # Measure the impact of contended wakeups on the RealTime waiter
 __gem:latency:realtime__() {
     for (( c=0; c<$1; c++ )); do
-        sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -R -f 3
+        sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -R -f 3 $3
     done
 }
 for p in 0 1 2 4 8 16 32 64 128 512 1024; do
 	name="gem:latency:realtime:$p"
 	test_name="$test_name $name"
 	eval "${name}_run() { __gem:latency:realtime__ \$1 $p ; } "
+
+	name="gem:latency:realtime:$p:fence"
+	test_name="$test_name $name"
+	eval "${name}_run() { __gem:latency:realtime__ \$1 $p -F ; } "
 done
diff --git a/benchmarks/gem_latency.c b/benchmarks/gem_latency.c
index 4b36d5ae..154c5f14 100644
--- a/benchmarks/gem_latency.c
+++ b/benchmarks/gem_latency.c
@@ -41,12 +41,17 @@
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/time.h>
+#include <sys/poll.h>
 #include <sys/resource.h>
 #include "drm.h"
 
+#define I915_EXEC_FENCE_OUT (1 << 17)
+#define LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR       DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+
 #define CONTEXT		0x1
 #define REALTIME	0x2
 #define CMDPARSER	0x4
+#define FENCE_OUT	0x8
 
 static int done;
 static int fd;
@@ -96,6 +101,20 @@ inline static uint32_t read_timestamp(void)
 }
 #endif
 
+static int __gem_execbuf_wr(int _fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+	int err = 0;
+	if (igt_ioctl(_fd, LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf))
+		err = -errno;
+	errno = 0;
+	return err;
+}
+
+static void gem_execbuf_wr(int _fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+	igt_assert_eq(__gem_execbuf_wr(_fd, execbuf), 0);
+}
+
 struct consumer {
 	pthread_t thread;
 
@@ -268,6 +287,8 @@ static void setup_latency(struct producer *p, int gen, unsigned flags)
 	if (flags & CMDPARSER)
 		eb->batch_len = sizeof(*map) * ((i + 1) & ~1);
 	eb->flags = I915_EXEC_BLT | LOCAL_EXEC_NO_RELOC;
+	if (flags & FENCE_OUT)
+		eb->flags |= I915_EXEC_FENCE_OUT;
 	eb->rsvd1 = p->ctx;
 }
 
@@ -297,9 +318,18 @@ static void setup_nop(struct producer *p, uint32_t batch, unsigned flags)
 	eb->rsvd1 = p->ctx;
 }
 
+static void fence_wait(int fence)
+{
+	struct pollfd pfd = { .fd = fence, .events = POLLIN };
+	poll(&pfd, 1, -1);
+}
+
 static void measure_latency(struct producer *p, struct igt_mean *mean)
 {
-	gem_sync(fd, p->latency_dispatch.exec[0].handle);
+	if (!(p->latency_dispatch.execbuf.flags & I915_EXEC_FENCE_OUT))
+		gem_sync(fd, p->latency_dispatch.exec[0].handle);
+	else
+		fence_wait(p->latency_dispatch.execbuf.rsvd2 >> 32);
 	igt_mean_add(mean, read_timestamp() - *p->last_timestamp);
 }
 
@@ -332,7 +362,10 @@ static void *producer(void *arg)
 		/* Finally, execute a batch that just reads the current
 		 * TIMESTAMP so we can measure the latency.
 		 */
-		gem_execbuf(fd, &p->latency_dispatch.execbuf);
+		if (p->latency_dispatch.execbuf.flags & I915_EXEC_FENCE_OUT)
+			gem_execbuf_wr(fd, &p->latency_dispatch.execbuf);
+		else
+			gem_execbuf(fd, &p->latency_dispatch.execbuf);
 
 		/* Wake all the associated clients to wait upon our batch */
 		p->wait = p->nconsumers;
@@ -354,6 +387,9 @@ static void *producer(void *arg)
 		pthread_mutex_unlock(&p->lock);
 
 		p->complete++;
+
+		if (p->latency_dispatch.execbuf.flags & I915_EXEC_FENCE_OUT)
+			close(p->latency_dispatch.execbuf.rsvd2 >> 32);
 	}
 
 	pthread_mutex_lock(&p->lock);
@@ -566,7 +602,7 @@ int main(int argc, char **argv)
 	unsigned flags = 0;
 	int c;
 
-	while ((c = getopt(argc, argv, "Cp:c:n:w:t:f:sR")) != -1) {
+	while ((c = getopt(argc, argv, "Cp:c:n:w:t:f:sRF")) != -1) {
 		switch (c) {
 		case 'p':
 			/* How many threads generate work? */
@@ -630,6 +666,10 @@ int main(int argc, char **argv)
 			flags |= CMDPARSER;
 			break;
 
+		case 'F':
+			flags |= FENCE_OUT;
+			break;
+
 		default:
 			break;
 		}
author	Chris Wilson <chris@chris-wilson.co.uk>	2016-09-02 17:25:55 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2016-09-02 17:27:55 +0100
commit	f92e47f4014ac5e9b115bf43ad40fde5182ad636 (patch)
tree	e5ed4ae1afe810b1c5160c6f47364ef35631790b
parent	a28e9e38a9efc6daf5a08d60d29adcd3e328fe6f (diff)