From f92e47f4014ac5e9b115bf43ad40fde5182ad636 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Sep 2016 17:25:55 +0100 Subject: benchmarks/gem_latency: Measure fence wakeup latencies Useful for comparing the cost of explict fences versus implicit. Signed-off-by: Chris Wilson --- benchmarks/ezbench.d/gem_latency.test | 24 +++++++++++++++--- benchmarks/gem_latency.c | 46 ++++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/benchmarks/ezbench.d/gem_latency.test b/benchmarks/ezbench.d/gem_latency.test index a62e1026..22d91b2e 100644 --- a/benchmarks/ezbench.d/gem_latency.test +++ b/benchmarks/ezbench.d/gem_latency.test @@ -33,47 +33,63 @@ done # Measure the wakeup delay (each wakeup should be uncontended) __gem:latency:wait__() { for (( c=0; c<$1; c++ )); do - sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 2 + sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 2 $3 done } for p in 1 2 4 8 16 32 64 128 256 512 1024; do name="gem:latency:wait:$p" test_name="$test_name $name" eval "${name}_run() { __gem:latency:wait__ \$1 $p ; } " + + name="gem:latency:wait:$p:fence" + test_name="$test_name $name" + eval "${name}_run() { __gem:latency:wait__ \$1 $p -F ; } " done # Measure the total CPU cycles for the uncontended wakeups __gem:latency:cpu__() { for (( c=0; c<$1; c++ )); do - sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 4 + sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 4 $3 done } for p in 1 2 4 8 16 32 64 128 512 1024; do name="gem:latency:cpu:$p" test_name="$test_name $name" eval "${name}_run() { __gem:latency:cpu__ \$1 $p ; } " + + name="gem:latency:cpu:$p:fence" + test_name="$test_name $name" + eval "${name}_run() { __gem:latency:cpu__ \$1 $p -F ; } " done # Measure the wakeup delay for contended wakeups (multiple waiters per request) __gem:latency:herd__() { for (( c=0; c<$1; c++ )); do - sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -f 2 + sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -f 2 $3 done } for p in 0 1 2 4 8 16 32 64 128 512 1024; do name="gem:latency:herd:$p" test_name="$test_name $name" eval "${name}_run() { __gem:latency:herd__ \$1 $p ; } " + + name="gem:latency:herd:$p:fence" + test_name="$test_name $name" + eval "${name}_run() { __gem:latency:herd__ \$1 $p -F ; } " done # Measure the impact of contended wakeups on the RealTime waiter __gem:latency:realtime__() { for (( c=0; c<$1; c++ )); do - sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -R -f 3 + sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -R -f 3 $3 done } for p in 0 1 2 4 8 16 32 64 128 512 1024; do name="gem:latency:realtime:$p" test_name="$test_name $name" eval "${name}_run() { __gem:latency:realtime__ \$1 $p ; } " + + name="gem:latency:realtime:$p:fence" + test_name="$test_name $name" + eval "${name}_run() { __gem:latency:realtime__ \$1 $p -F ; } " done diff --git a/benchmarks/gem_latency.c b/benchmarks/gem_latency.c index 4b36d5ae..154c5f14 100644 --- a/benchmarks/gem_latency.c +++ b/benchmarks/gem_latency.c @@ -41,12 +41,17 @@ #include #include #include +#include #include #include "drm.h" +#define I915_EXEC_FENCE_OUT (1 << 17) +#define LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) + #define CONTEXT 0x1 #define REALTIME 0x2 #define CMDPARSER 0x4 +#define FENCE_OUT 0x8 static int done; static int fd; @@ -96,6 +101,20 @@ inline static uint32_t read_timestamp(void) } #endif +static int __gem_execbuf_wr(int _fd, struct drm_i915_gem_execbuffer2 *execbuf) +{ + int err = 0; + if (igt_ioctl(_fd, LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf)) + err = -errno; + errno = 0; + return err; +} + +static void gem_execbuf_wr(int _fd, struct drm_i915_gem_execbuffer2 *execbuf) +{ + igt_assert_eq(__gem_execbuf_wr(_fd, execbuf), 0); +} + struct consumer { pthread_t thread; @@ -268,6 +287,8 @@ static void setup_latency(struct producer *p, int gen, unsigned flags) if (flags & CMDPARSER) eb->batch_len = sizeof(*map) * ((i + 1) & ~1); eb->flags = I915_EXEC_BLT | LOCAL_EXEC_NO_RELOC; + if (flags & FENCE_OUT) + eb->flags |= I915_EXEC_FENCE_OUT; eb->rsvd1 = p->ctx; } @@ -297,9 +318,18 @@ static void setup_nop(struct producer *p, uint32_t batch, unsigned flags) eb->rsvd1 = p->ctx; } +static void fence_wait(int fence) +{ + struct pollfd pfd = { .fd = fence, .events = POLLIN }; + poll(&pfd, 1, -1); +} + static void measure_latency(struct producer *p, struct igt_mean *mean) { - gem_sync(fd, p->latency_dispatch.exec[0].handle); + if (!(p->latency_dispatch.execbuf.flags & I915_EXEC_FENCE_OUT)) + gem_sync(fd, p->latency_dispatch.exec[0].handle); + else + fence_wait(p->latency_dispatch.execbuf.rsvd2 >> 32); igt_mean_add(mean, read_timestamp() - *p->last_timestamp); } @@ -332,7 +362,10 @@ static void *producer(void *arg) /* Finally, execute a batch that just reads the current * TIMESTAMP so we can measure the latency. */ - gem_execbuf(fd, &p->latency_dispatch.execbuf); + if (p->latency_dispatch.execbuf.flags & I915_EXEC_FENCE_OUT) + gem_execbuf_wr(fd, &p->latency_dispatch.execbuf); + else + gem_execbuf(fd, &p->latency_dispatch.execbuf); /* Wake all the associated clients to wait upon our batch */ p->wait = p->nconsumers; @@ -354,6 +387,9 @@ static void *producer(void *arg) pthread_mutex_unlock(&p->lock); p->complete++; + + if (p->latency_dispatch.execbuf.flags & I915_EXEC_FENCE_OUT) + close(p->latency_dispatch.execbuf.rsvd2 >> 32); } pthread_mutex_lock(&p->lock); @@ -566,7 +602,7 @@ int main(int argc, char **argv) unsigned flags = 0; int c; - while ((c = getopt(argc, argv, "Cp:c:n:w:t:f:sR")) != -1) { + while ((c = getopt(argc, argv, "Cp:c:n:w:t:f:sRF")) != -1) { switch (c) { case 'p': /* How many threads generate work? */ @@ -630,6 +666,10 @@ int main(int argc, char **argv) flags |= CMDPARSER; break; + case 'F': + flags |= FENCE_OUT; + break; + default: break; } -- cgit v1.2.3