summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-09-02 17:25:55 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2016-09-02 17:27:55 +0100
commitf92e47f4014ac5e9b115bf43ad40fde5182ad636 (patch)
treee5ed4ae1afe810b1c5160c6f47364ef35631790b
parenta28e9e38a9efc6daf5a08d60d29adcd3e328fe6f (diff)
benchmarks/gem_latency: Measure fence wakeup latencies
Useful for comparing the cost of explict fences versus implicit. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--benchmarks/ezbench.d/gem_latency.test24
-rw-r--r--benchmarks/gem_latency.c46
2 files changed, 63 insertions, 7 deletions
diff --git a/benchmarks/ezbench.d/gem_latency.test b/benchmarks/ezbench.d/gem_latency.test
index a62e1026..22d91b2e 100644
--- a/benchmarks/ezbench.d/gem_latency.test
+++ b/benchmarks/ezbench.d/gem_latency.test
@@ -33,47 +33,63 @@ done
# Measure the wakeup delay (each wakeup should be uncontended)
__gem:latency:wait__() {
for (( c=0; c<$1; c++ )); do
- sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 2
+ sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 2 $3
done
}
for p in 1 2 4 8 16 32 64 128 256 512 1024; do
name="gem:latency:wait:$p"
test_name="$test_name $name"
eval "${name}_run() { __gem:latency:wait__ \$1 $p ; } "
+
+ name="gem:latency:wait:$p:fence"
+ test_name="$test_name $name"
+ eval "${name}_run() { __gem:latency:wait__ \$1 $p -F ; } "
done
# Measure the total CPU cycles for the uncontended wakeups
__gem:latency:cpu__() {
for (( c=0; c<$1; c++ )); do
- sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 4
+ sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 2 -p $2 -f 4 $3
done
}
for p in 1 2 4 8 16 32 64 128 512 1024; do
name="gem:latency:cpu:$p"
test_name="$test_name $name"
eval "${name}_run() { __gem:latency:cpu__ \$1 $p ; } "
+
+ name="gem:latency:cpu:$p:fence"
+ test_name="$test_name $name"
+ eval "${name}_run() { __gem:latency:cpu__ \$1 $p -F ; } "
done
# Measure the wakeup delay for contended wakeups (multiple waiters per request)
__gem:latency:herd__() {
for (( c=0; c<$1; c++ )); do
- sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -f 2
+ sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -f 2 $3
done
}
for p in 0 1 2 4 8 16 32 64 128 512 1024; do
name="gem:latency:herd:$p"
test_name="$test_name $name"
eval "${name}_run() { __gem:latency:herd__ \$1 $p ; } "
+
+ name="gem:latency:herd:$p:fence"
+ test_name="$test_name $name"
+ eval "${name}_run() { __gem:latency:herd__ \$1 $p -F ; } "
done
# Measure the impact of contended wakeups on the RealTime waiter
__gem:latency:realtime__() {
for (( c=0; c<$1; c++ )); do
- sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -R -f 3
+ sudo $IGT_BENCHMARKS/gem_latency -t 2 -w 1 -c $2 -R -f 3 $3
done
}
for p in 0 1 2 4 8 16 32 64 128 512 1024; do
name="gem:latency:realtime:$p"
test_name="$test_name $name"
eval "${name}_run() { __gem:latency:realtime__ \$1 $p ; } "
+
+ name="gem:latency:realtime:$p:fence"
+ test_name="$test_name $name"
+ eval "${name}_run() { __gem:latency:realtime__ \$1 $p -F ; } "
done
diff --git a/benchmarks/gem_latency.c b/benchmarks/gem_latency.c
index 4b36d5ae..154c5f14 100644
--- a/benchmarks/gem_latency.c
+++ b/benchmarks/gem_latency.c
@@ -41,12 +41,17 @@
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/time.h>
+#include <sys/poll.h>
#include <sys/resource.h>
#include "drm.h"
+#define I915_EXEC_FENCE_OUT (1 << 17)
+#define LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+
#define CONTEXT 0x1
#define REALTIME 0x2
#define CMDPARSER 0x4
+#define FENCE_OUT 0x8
static int done;
static int fd;
@@ -96,6 +101,20 @@ inline static uint32_t read_timestamp(void)
}
#endif
+static int __gem_execbuf_wr(int _fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+ int err = 0;
+ if (igt_ioctl(_fd, LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf))
+ err = -errno;
+ errno = 0;
+ return err;
+}
+
+static void gem_execbuf_wr(int _fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+ igt_assert_eq(__gem_execbuf_wr(_fd, execbuf), 0);
+}
+
struct consumer {
pthread_t thread;
@@ -268,6 +287,8 @@ static void setup_latency(struct producer *p, int gen, unsigned flags)
if (flags & CMDPARSER)
eb->batch_len = sizeof(*map) * ((i + 1) & ~1);
eb->flags = I915_EXEC_BLT | LOCAL_EXEC_NO_RELOC;
+ if (flags & FENCE_OUT)
+ eb->flags |= I915_EXEC_FENCE_OUT;
eb->rsvd1 = p->ctx;
}
@@ -297,9 +318,18 @@ static void setup_nop(struct producer *p, uint32_t batch, unsigned flags)
eb->rsvd1 = p->ctx;
}
+static void fence_wait(int fence)
+{
+ struct pollfd pfd = { .fd = fence, .events = POLLIN };
+ poll(&pfd, 1, -1);
+}
+
static void measure_latency(struct producer *p, struct igt_mean *mean)
{
- gem_sync(fd, p->latency_dispatch.exec[0].handle);
+ if (!(p->latency_dispatch.execbuf.flags & I915_EXEC_FENCE_OUT))
+ gem_sync(fd, p->latency_dispatch.exec[0].handle);
+ else
+ fence_wait(p->latency_dispatch.execbuf.rsvd2 >> 32);
igt_mean_add(mean, read_timestamp() - *p->last_timestamp);
}
@@ -332,7 +362,10 @@ static void *producer(void *arg)
/* Finally, execute a batch that just reads the current
* TIMESTAMP so we can measure the latency.
*/
- gem_execbuf(fd, &p->latency_dispatch.execbuf);
+ if (p->latency_dispatch.execbuf.flags & I915_EXEC_FENCE_OUT)
+ gem_execbuf_wr(fd, &p->latency_dispatch.execbuf);
+ else
+ gem_execbuf(fd, &p->latency_dispatch.execbuf);
/* Wake all the associated clients to wait upon our batch */
p->wait = p->nconsumers;
@@ -354,6 +387,9 @@ static void *producer(void *arg)
pthread_mutex_unlock(&p->lock);
p->complete++;
+
+ if (p->latency_dispatch.execbuf.flags & I915_EXEC_FENCE_OUT)
+ close(p->latency_dispatch.execbuf.rsvd2 >> 32);
}
pthread_mutex_lock(&p->lock);
@@ -566,7 +602,7 @@ int main(int argc, char **argv)
unsigned flags = 0;
int c;
- while ((c = getopt(argc, argv, "Cp:c:n:w:t:f:sR")) != -1) {
+ while ((c = getopt(argc, argv, "Cp:c:n:w:t:f:sRF")) != -1) {
switch (c) {
case 'p':
/* How many threads generate work? */
@@ -630,6 +666,10 @@ int main(int argc, char **argv)
flags |= CMDPARSER;
break;
+ case 'F':
+ flags |= FENCE_OUT;
+ break;
+
default:
break;
}