diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2016-07-04 12:37:32 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2016-07-04 13:02:34 +0100 |
commit | d2c6196aaad63653596899d2aec40120c7194c71 (patch) | |
tree | 849909424b72bdba9dab31cc2d42f2e14bd93fe3 /tests/gem_sync.c | |
parent | c2893a90365d101d0a07de8bd5b882d53817bb19 (diff) |
igt/gem_sync: Extend with store synchronisation
Currently gem_sync exploits read-read optimisation to wait upon multiple
rings simultaneously. But at the moment, gem_exec_flush is showing
sporadic missed interrupts on bdw/skl and yet gem_sync is not. This is
some subtlety in the timing, perhaps caused by the extra write. This set
of tests tries to exercise that by using a write batch - which also
means we exercise inter-ring synchronisation (like gem_storedw_loop).
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'tests/gem_sync.c')
-rw-r--r-- | tests/gem_sync.c | 281 |
1 files changed, 281 insertions, 0 deletions
diff --git a/tests/gem_sync.c b/tests/gem_sync.c index 937c6eb2..8a19f23f 100644 --- a/tests/gem_sync.c +++ b/tests/gem_sync.c @@ -25,6 +25,14 @@ #include "igt.h" +#define LOCAL_I915_EXEC_NO_RELOC (1<<11) +#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) + +#define LOCAL_I915_EXEC_BSD_SHIFT (13) +#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) + +#define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) + IGT_TEST_DESCRIPTION("Basic check of ring<->ring write synchronisation."); /* @@ -65,6 +73,11 @@ out: return ts.tv_sec + 1e-9*ts.tv_nsec; } +static bool can_mi_store_dword(int gen, unsigned engine) +{ + return !(gen == 6 && (engine & ~(3<<13)) == I915_EXEC_BSD); +} + static void sync_ring(int fd, unsigned ring, int num_children) { @@ -139,6 +152,134 @@ sync_ring(int fd, unsigned ring, int num_children) } static void +store_ring(int fd, unsigned ring, int num_children) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + unsigned engines[16]; + const char *names[16]; + int num_engines = 0; + + if (ring == ~0u) { + const struct intel_execution_engine *e; + + for (e = intel_execution_engines; e->name; e++) { + if (e->exec_id == 0) + continue; + + if (!gem_has_ring(fd, e->exec_id | e->flags)) + continue; + + if (!can_mi_store_dword(gen, e->exec_id)) + continue; + + if (e->exec_id == I915_EXEC_BSD) { + int is_bsd2 = e->flags != 0; + if (gem_has_bsd2(fd) != is_bsd2) + continue; + } + + names[num_engines] = e->name; + engines[num_engines++] = e->exec_id | e->flags; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + + num_children *= num_engines; + } else { + gem_require_ring(fd, ring); + names[num_engines] = NULL; + engines[num_engines++] = ring; + } + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_children) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object[2]; + struct drm_i915_gem_relocation_entry reloc[1024]; + struct drm_i915_gem_execbuffer2 execbuf; + double start, elapsed; + unsigned long cycles; + uint32_t *batch, *b; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = (uintptr_t)object; + execbuf.flags = engines[child % num_engines]; + execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; + execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; + if (gen < 6) + execbuf.flags |= I915_EXEC_SECURE; + + memset(object, 0, sizeof(object)); + object[0].handle = gem_create(fd, 4096); + gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe)); + execbuf.buffer_count = 1; + gem_execbuf(fd, &execbuf); + + object[0].flags |= EXEC_OBJECT_WRITE; + object[1].handle = gem_create(fd, 20*1024); + + object[1].relocs_ptr = (uintptr_t)reloc; + object[1].relocation_count = 1024; + + batch = gem_mmap__cpu(fd, object[1].handle, 0, 20*1024, + PROT_WRITE | PROT_READ); + gem_set_domain(fd, object[1].handle, + I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + + memset(reloc, 0, sizeof(reloc)); + b = batch; + for (int i = 0; i < 1024; i++) { + uint64_t offset; + + reloc[i].presumed_offset = object[0].offset; + reloc[i].offset = (b - batch + 1) * sizeof(*batch); + reloc[i].delta = i * sizeof(uint32_t); + reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; + reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; + + offset = object[0].offset + reloc[i].delta; + *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + *b++ = offset; + *b++ = offset >> 32; + } else if (gen >= 4) { + *b++ = 0; + *b++ = offset; + reloc[i].offset += sizeof(*batch); + } else { + b[-1] -= 1; + *b++ = offset; + } + *b++ = i; + } + *b++ = MI_BATCH_BUFFER_END; + igt_assert((b - batch)*sizeof(uint32_t) < 20*1024); + munmap(batch, 20*1024); + execbuf.buffer_count = 2; + gem_execbuf(fd, &execbuf); + gem_sync(fd, object[1].handle); + + start = gettime(); + cycles = 0; + do { + do { + gem_execbuf(fd, &execbuf); + gem_sync(fd, object[1].handle); + } while (++cycles & 1023); + } while ((elapsed = gettime() - start) < SLOW_QUICK(10, 1)); + igt_info("%s%sompleted %ld cycles: %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " c" : "C", + cycles, elapsed*1e6/cycles); + + gem_close(fd, object[1].handle); + gem_close(fd, object[0].handle); + } + igt_waitchildren_timeout(20, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + +static void sync_all(int fd, int num_children) { const struct intel_execution_engine *e; @@ -201,6 +342,134 @@ sync_all(int fd, int num_children) igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } +static void xchg(void *array, unsigned i, unsigned j) +{ + uint32_t *u32 = array; + uint32_t tmp = u32[i]; + u32[i] = u32[j]; + u32[j] = tmp; +} + +static void +store_all(int fd, int num_children) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + const struct intel_execution_engine *e; + unsigned engines[16]; + int num_engines = 0; + + for (e = intel_execution_engines; e->name; e++) { + if (e->exec_id == 0) + continue; + + if (!gem_has_ring(fd, e->exec_id | e->flags)) + continue; + + if (!can_mi_store_dword(gen, e->exec_id)) + continue; + + if (e->exec_id == I915_EXEC_BSD) { + int is_bsd2 = e->flags != 0; + if (gem_has_bsd2(fd) != is_bsd2) + continue; + } + + engines[num_engines++] = e->exec_id | e->flags; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + igt_require(num_engines); + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_children) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object[2]; + struct drm_i915_gem_relocation_entry reloc[1024]; + struct drm_i915_gem_execbuffer2 execbuf; + double start, elapsed; + unsigned long cycles; + uint32_t *batch, *b; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = (uintptr_t)object; + execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; + execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; + if (gen < 6) + execbuf.flags |= I915_EXEC_SECURE; + + memset(object, 0, sizeof(object)); + object[0].handle = gem_create(fd, 4096); + gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe)); + execbuf.buffer_count = 1; + gem_execbuf(fd, &execbuf); + + object[0].flags |= EXEC_OBJECT_WRITE; + object[1].handle = gem_create(fd, 1024*16 + 4096); + + object[1].relocs_ptr = (uintptr_t)reloc; + object[1].relocation_count = 1024; + + batch = gem_mmap__cpu(fd, object[1].handle, 0, 16*1024 + 4096, + PROT_WRITE | PROT_READ); + gem_set_domain(fd, object[1].handle, + I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + + memset(reloc, 0, sizeof(reloc)); + b = batch; + for (int i = 0; i < 1024; i++) { + uint64_t offset; + + reloc[i].presumed_offset = object[0].offset; + reloc[i].offset = (b - batch + 1) * sizeof(*batch); + reloc[i].delta = i * sizeof(uint32_t); + reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; + reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; + + offset = object[0].offset + reloc[i].delta; + *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + *b++ = offset; + *b++ = offset >> 32; + } else if (gen >= 4) { + *b++ = 0; + *b++ = offset; + reloc[i].offset += sizeof(*batch); + } else { + b[-1] -= 1; + *b++ = offset; + } + *b++ = i; + } + *b++ = MI_BATCH_BUFFER_END; + igt_assert((b - batch)*sizeof(uint32_t) < 20*1024); + munmap(batch, 16*1024+4096); + execbuf.buffer_count = 2; + gem_execbuf(fd, &execbuf); + gem_sync(fd, object[1].handle); + + start = gettime(); + cycles = 0; + do { + do { + igt_permute_array(engines, num_engines, xchg); + for (int n = 0; n < num_engines; n++) { + execbuf.flags &= ~ENGINE_MASK; + execbuf.flags |= engines[n]; + gem_execbuf(fd, &execbuf); + } + gem_sync(fd, object[1].handle); + } while (++cycles & 1023); + } while ((elapsed = gettime() - start) < SLOW_QUICK(10, 1)); + igt_info("Completed %ld cycles: %.3f us\n", + cycles, elapsed*1e6/cycles); + + gem_close(fd, object[1].handle); + gem_close(fd, object[0].handle); + } + igt_waitchildren_timeout(20, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + igt_main { const struct intel_execution_engine *e; @@ -218,19 +487,31 @@ igt_main for (e = intel_execution_engines; e->name; e++) { igt_subtest_f("%s", e->name) sync_ring(fd, e->exec_id | e->flags, 1); + igt_subtest_f("store-%s", e->name) + store_ring(fd, e->exec_id | e->flags, 1); igt_subtest_f("forked-%s", e->name) sync_ring(fd, e->exec_id | e->flags, ncpus); + igt_subtest_f("forked-store-%s", e->name) + store_ring(fd, e->exec_id | e->flags, ncpus); } igt_subtest("basic-each") sync_ring(fd, ~0u, 1); + igt_subtest("basic-store-each") + store_ring(fd, ~0u, 1); igt_subtest("forked-each") sync_ring(fd, ~0u, ncpus); + igt_subtest("forked-store-each") + store_ring(fd, ~0u, ncpus); igt_subtest("basic-all") sync_all(fd, 1); + igt_subtest("basic-store-all") + store_all(fd, 1); igt_subtest("forked-all") sync_all(fd, ncpus); + igt_subtest("forked-store-all") + store_all(fd, ncpus); igt_fixture { igt_stop_hang_detector(); |