From 99c015af48f55b63df26e3c45bbff4d3c1fcb54b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 May 2016 15:20:38 +0100 Subject: benchmarks: Add gem_exec_fault If we specify an unobtainable alignment (e.g, 63bits) the kernel will eviction the object from the GTT and fail to rebind it. We can use this, to measure how long it takes to move objects around in the GTT by running execbuf followed by the unbind. For small objects, this will be dominated by the nop execution time, but for larger objects this will be ratelimited by how fast we can rewrite the PTE. Signed-off-by: Chris Wilson --- benchmarks/Makefile.sources | 1 + benchmarks/ezbench.d/gem_exec_fault.test | 22 ++++ benchmarks/gem_exec_fault.c | 198 +++++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 benchmarks/ezbench.d/gem_exec_fault.test create mode 100644 benchmarks/gem_exec_fault.c diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources index 81607a56..bc4f2b52 100644 --- a/benchmarks/Makefile.sources +++ b/benchmarks/Makefile.sources @@ -8,6 +8,7 @@ benchmarks_PROGRAMS = \ gem_blt \ gem_create \ gem_exec_ctx \ + gem_exec_fault \ gem_exec_nop \ gem_exec_reloc \ gem_exec_trace \ diff --git a/benchmarks/ezbench.d/gem_exec_fault.test b/benchmarks/ezbench.d/gem_exec_fault.test new file mode 100644 index 00000000..e9f2fcdd --- /dev/null +++ b/benchmarks/ezbench.d/gem_exec_fault.test @@ -0,0 +1,22 @@ +[ -e $IGT_BENCHMARKS/gem_exec_fault ] || return 1 +sudo -n true || return 1 + +ring=rcs # rcs bcs vcs vecs all +while read x size; do + name="gem:exec:fault:$size" + test_name="$test_name $name" + eval "${name}_run() { sudo $IGT_BENCHMARKS/gem_exec_fault -s $x -e $ring -r \$1 ; }" + + name="gem:exec:fault:$size:forked" + test_name="$test_name $name" + eval "${name}_run() { sudo $IGT_BENCHMARKS/gem_exec_fault -f -s $x -e $ring -r \$1 ; }" +done< + * + */ + +/* Measure the time it to takes to bind/unbind objects from the ppGTT */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "drm.h" +#include "ioctl_wrappers.h" +#include "drmtest.h" +#include "intel_io.h" +#include "igt_stats.h" + +#define LOCAL_I915_EXEC_NO_RELOC (1<<11) +#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) + +#define LOCAL_I915_EXEC_BSD_SHIFT (13) +#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) + +#define ENGINE_FLAGS (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) + +static double elapsed(const struct timespec *start, + const struct timespec *end) +{ + return (end->tv_sec - start->tv_sec) + 1e-9*(end->tv_nsec - start->tv_nsec); +} + +static uint32_t batch(int fd, int size) +{ + const uint32_t buf[] = {MI_BATCH_BUFFER_END}; + uint32_t handle = gem_create(fd, size); + gem_write(fd, handle, 0, buf, sizeof(buf)); + return handle; +} + +static int loop(int size, unsigned ring, int reps, int ncpus, unsigned flags) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 obj; + unsigned engines[16]; + unsigned nengine; + double *shared; + int fd; + + shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + + fd = drm_open_driver(DRIVER_INTEL); + + memset(&obj, 0, sizeof(obj)); + obj.handle = batch(fd, size); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = (uintptr_t)&obj; + execbuf.buffer_count = 1; + execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; + execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; + if (__gem_execbuf(fd, &execbuf)) { + execbuf.flags = 0; + if (__gem_execbuf(fd, &execbuf)) + return 77; + } + gem_close(fd, obj.handle); + + nengine = 0; + if (ring == -1) { + for (ring = 1; ring < 16; ring++) { + execbuf.flags &= ~ENGINE_FLAGS; + execbuf.flags |= ring; + if (__gem_execbuf(fd, &execbuf) == 0) + engines[nengine++] = ring; + } + } else + engines[nengine++] = ring; + + while (reps--) { + memset(shared, 0, 4096); + + igt_fork(child, ncpus) { + struct timespec start, end; + unsigned count = 0; + + obj.handle = batch(fd, size); + + clock_gettime(CLOCK_MONOTONIC, &start); + do { + for (int inner = 0; inner < 1024; inner++) { + execbuf.flags &= ~ENGINE_FLAGS; + execbuf.flags |= engines[count++ % nengine]; + /* fault in */ + gem_execbuf(fd, &execbuf); + + /* fault out */ + obj.alignment = 1ull << 63; + __gem_execbuf(fd, &execbuf); + } + + clock_gettime(CLOCK_MONOTONIC, &end); + } while (elapsed(&start, &end) < 2.); + + gem_sync(fd, obj.handle); + clock_gettime(CLOCK_MONOTONIC, &end); + shared[child] = 1e6*elapsed(&start, &end) / count / 2; + + gem_close(fd, obj.handle); + } + igt_waitchildren(); + + for (int child = 0; child < ncpus; child++) + shared[ncpus] += shared[child]; + printf("%7.3f\n", shared[ncpus] / ncpus); + } + return 0; +} + +int main(int argc, char **argv) +{ + unsigned ring = I915_EXEC_RENDER; + unsigned flags = 0; + int size = 4096; + int reps = 1; + int ncpus = 1; + int c; + + while ((c = getopt (argc, argv, "e:r:s:f")) != -1) { + switch (c) { + case 'e': + if (strcmp(optarg, "rcs") == 0) + ring = I915_EXEC_RENDER; + else if (strcmp(optarg, "vcs") == 0) + ring = I915_EXEC_BSD; + else if (strcmp(optarg, "bcs") == 0) + ring = I915_EXEC_BLT; + else if (strcmp(optarg, "vecs") == 0) + ring = I915_EXEC_VEBOX; + else if (strcmp(optarg, "all") == 0) + ring = -1; + else + ring = atoi(optarg); + break; + + case 'r': + reps = atoi(optarg); + if (reps < 1) + reps = 1; + break; + + case 'f': + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + break; + + case 's': + size = atoi(optarg); + if (size < 4096) + size = 4096; + break; + + default: + break; + } + } + + return loop(size, ring, reps, ncpus, flags); +} -- cgit v1.2.3