summaryrefslogtreecommitdiff
path: root/tests/i915/gem_ctx_switch.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2019-02-23 00:32:22 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2019-02-26 16:13:30 +0000
commitf308fb6efa8045a6e97340a49758a2d593e3cb60 (patch)
treec827f3a51b60b6167a581ac0867fb949a9521f31 /tests/i915/gem_ctx_switch.c
parent25911cdde500aa6ddede601faec91741c6963c27 (diff)
i915/gem_ctx_switch: Use minimum qlen over all engines and measure switches
Not all engines are created equal, and our weighting ends up favouring the many faster xCS rings at the expense of RCS. Our qlen estimation also failed to factor in the context switch overhead, which is a significant factor for nop batches. So we oversubscribe the number of batches submitted to RCS and end up waiting for those to complete at the end of our subtest timeslice. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Caz Yokoyama <caz.yokoyama@intel.com> Reviewed-by: Caz Yokoyama <caz.yokoyama@intel.com>
Diffstat (limited to 'tests/i915/gem_ctx_switch.c')
-rw-r--r--tests/i915/gem_ctx_switch.c39
1 files changed, 31 insertions, 8 deletions
diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
index 1208cb8d..87e13b91 100644
--- a/tests/i915/gem_ctx_switch.c
+++ b/tests/i915/gem_ctx_switch.c
@@ -26,6 +26,7 @@
*/
#include "igt.h"
+#include <limits.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
@@ -58,29 +59,50 @@ static int measure_qlen(int fd,
{
const struct drm_i915_gem_exec_object2 * const obj =
(struct drm_i915_gem_exec_object2 *)(uintptr_t)execbuf->buffers_ptr;
- int qlen = 64;
+ uint32_t ctx[64];
+ int min = INT_MAX, max = 0;
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ ctx[i] = gem_context_create(fd);
for (unsigned int n = 0; n < nengine; n++) {
uint64_t saved = execbuf->flags;
struct timespec tv = {};
+ int q;
execbuf->flags |= engine[n];
- igt_nsec_elapsed(&tv);
- for (int loop = 0; loop < qlen; loop++)
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ execbuf->rsvd1 = ctx[i];
gem_execbuf(fd, execbuf);
+ }
gem_sync(fd, obj->handle);
- execbuf->flags = saved;
+ igt_nsec_elapsed(&tv);
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ execbuf->rsvd1 = ctx[i];
+ gem_execbuf(fd, execbuf);
+ }
+ gem_sync(fd, obj->handle);
/*
* Be conservative and aim not to overshoot timeout, so scale
* down by 8 for hopefully a max of 12.5% error.
*/
- qlen = qlen * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
+ q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
+ if (q < min)
+ min = q;
+ if (q > max)
+ max = q;
+
+ execbuf->flags = saved;
}
- return qlen;
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ gem_context_destroy(fd, ctx[i]);
+
+ igt_debug("Estimated qlen: {min:%d, max:%d}\n", min, max);
+ return min;
}
static void single(int fd, uint32_t handle,
@@ -259,9 +281,10 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
clock_gettime(CLOCK_MONOTONIC, &now);
gem_close(fd, obj[0].handle);
- igt_info("[%d:%d] %s: %'u cycles: %.3fus%s\n",
+ igt_info("[%d:%d] %s: %'u cycles: %.3fus%s (elapsed: %.3fs)\n",
nctx, child, name[child], count, elapsed(&start, &now)*1e6 / count,
- flags & INTERRUPTIBLE ? " (interruptible)" : "");
+ flags & INTERRUPTIBLE ? " (interruptible)" : "",
+ elapsed(&start, &now));
}
igt_waitchildren();
}