igt/gem_sync: Extend with store synchronisation

Currently gem_sync exploits read-read optimisation to wait upon multiple rings simultaneously. But at the moment, gem_exec_flush is showing sporadic missed interrupts on bdw/skl and yet gem_sync is not. This is some subtlety in the timing, perhaps caused by the extra write. This set of tests tries to exercise that by using a write batch - which also means we exercise inter-ring synchronisation (like gem_storedw_loop). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2016-07-04 12:37:32 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2016-07-04 13:02:34 +0100
commit: d2c6196aaad63653596899d2aec40120c7194c71 (patch)
tree: 849909424b72bdba9dab31cc2d42f2e14bd93fe3 /tests/gem_sync.c
parent: c2893a90365d101d0a07de8bd5b882d53817bb19 (diff)
1 files changed, 281 insertions, 0 deletions
diff --git a/tests/gem_sync.c b/tests/gem_sync.c
index 937c6eb2..8a19f23f 100644
--- a/tests/gem_sync.c
+++ b/tests/gem_sync.c
@@ -25,6 +25,14 @@
 
 #include "igt.h"
 
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
 IGT_TEST_DESCRIPTION("Basic check of ring<->ring write synchronisation.");
 
 /*
@@ -65,6 +73,11 @@ out:
 	return ts.tv_sec + 1e-9*ts.tv_nsec;
 }
 
+static bool can_mi_store_dword(int gen, unsigned engine)
+{
+	return !(gen == 6 && (engine & ~(3<<13)) == I915_EXEC_BSD);
+}
+
 static void
 sync_ring(int fd, unsigned ring, int num_children)
 {
@@ -139,6 +152,134 @@ sync_ring(int fd, unsigned ring, int num_children)
 }
 
 static void
+store_ring(int fd, unsigned ring, int num_children)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	if (ring == ~0u) {
+		const struct intel_execution_engine *e;
+
+		for (e = intel_execution_engines; e->name; e++) {
+			if (e->exec_id == 0)
+				continue;
+
+			if (!gem_has_ring(fd, e->exec_id | e->flags))
+				continue;
+
+			if (!can_mi_store_dword(gen, e->exec_id))
+				continue;
+
+			if (e->exec_id == I915_EXEC_BSD) {
+				int is_bsd2 = e->flags != 0;
+				if (gem_has_bsd2(fd) != is_bsd2)
+					continue;
+			}
+
+			names[num_engines] = e->name;
+			engines[num_engines++] = e->exec_id | e->flags;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+
+		num_children *= num_engines;
+	} else {
+		gem_require_ring(fd, ring);
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_children) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object[2];
+		struct drm_i915_gem_relocation_entry reloc[1024];
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double start, elapsed;
+		unsigned long cycles;
+		uint32_t *batch, *b;
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = (uintptr_t)object;
+		execbuf.flags = engines[child % num_engines];
+		execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+		execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+		if (gen < 6)
+			execbuf.flags |= I915_EXEC_SECURE;
+
+		memset(object, 0, sizeof(object));
+		object[0].handle = gem_create(fd, 4096);
+		gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe));
+		execbuf.buffer_count = 1;
+		gem_execbuf(fd, &execbuf);
+
+		object[0].flags |= EXEC_OBJECT_WRITE;
+		object[1].handle = gem_create(fd, 20*1024);
+
+		object[1].relocs_ptr = (uintptr_t)reloc;
+		object[1].relocation_count = 1024;
+
+		batch = gem_mmap__cpu(fd, object[1].handle, 0, 20*1024,
+				PROT_WRITE | PROT_READ);
+		gem_set_domain(fd, object[1].handle,
+				I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+		memset(reloc, 0, sizeof(reloc));
+		b = batch;
+		for (int i = 0; i < 1024; i++) {
+			uint64_t offset;
+
+			reloc[i].presumed_offset = object[0].offset;
+			reloc[i].offset = (b - batch + 1) * sizeof(*batch);
+			reloc[i].delta = i * sizeof(uint32_t);
+			reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+			reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+			offset = object[0].offset + reloc[i].delta;
+			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			if (gen >= 8) {
+				*b++ = offset;
+				*b++ = offset >> 32;
+			} else if (gen >= 4) {
+				*b++ = 0;
+				*b++ = offset;
+				reloc[i].offset += sizeof(*batch);
+			} else {
+				b[-1] -= 1;
+				*b++ = offset;
+			}
+			*b++ = i;
+		}
+		*b++ = MI_BATCH_BUFFER_END;
+		igt_assert((b - batch)*sizeof(uint32_t) < 20*1024);
+		munmap(batch, 20*1024);
+		execbuf.buffer_count = 2;
+		gem_execbuf(fd, &execbuf);
+		gem_sync(fd, object[1].handle);
+
+		start = gettime();
+		cycles = 0;
+		do {
+			do {
+				gem_execbuf(fd, &execbuf);
+				gem_sync(fd, object[1].handle);
+			} while (++cycles & 1023);
+		} while ((elapsed = gettime() - start) < SLOW_QUICK(10, 1));
+		igt_info("%s%sompleted %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, elapsed*1e6/cycles);
+
+		gem_close(fd, object[1].handle);
+		gem_close(fd, object[0].handle);
+	}
+	igt_waitchildren_timeout(20, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void
 sync_all(int fd, int num_children)
 {
 	const struct intel_execution_engine *e;
@@ -201,6 +342,134 @@ sync_all(int fd, int num_children)
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
 }
 
+static void xchg(void *array, unsigned i, unsigned j)
+{
+	uint32_t *u32 = array;
+	uint32_t tmp = u32[i];
+	u32[i] = u32[j];
+	u32[j] = tmp;
+}
+
+static void
+store_all(int fd, int num_children)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const struct intel_execution_engine *e;
+	unsigned engines[16];
+	int num_engines = 0;
+
+	for (e = intel_execution_engines; e->name; e++) {
+		if (e->exec_id == 0)
+			continue;
+
+		if (!gem_has_ring(fd, e->exec_id | e->flags))
+			continue;
+
+		if (!can_mi_store_dword(gen, e->exec_id))
+			continue;
+
+		if (e->exec_id == I915_EXEC_BSD) {
+			int is_bsd2 = e->flags != 0;
+			if (gem_has_bsd2(fd) != is_bsd2)
+				continue;
+		}
+
+		engines[num_engines++] = e->exec_id | e->flags;
+		if (num_engines == ARRAY_SIZE(engines))
+			break;
+	}
+	igt_require(num_engines);
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_children) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object[2];
+		struct drm_i915_gem_relocation_entry reloc[1024];
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double start, elapsed;
+		unsigned long cycles;
+		uint32_t *batch, *b;
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = (uintptr_t)object;
+		execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+		execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+		if (gen < 6)
+			execbuf.flags |= I915_EXEC_SECURE;
+
+		memset(object, 0, sizeof(object));
+		object[0].handle = gem_create(fd, 4096);
+		gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe));
+		execbuf.buffer_count = 1;
+		gem_execbuf(fd, &execbuf);
+
+		object[0].flags |= EXEC_OBJECT_WRITE;
+		object[1].handle = gem_create(fd, 1024*16 + 4096);
+
+		object[1].relocs_ptr = (uintptr_t)reloc;
+		object[1].relocation_count = 1024;
+
+		batch = gem_mmap__cpu(fd, object[1].handle, 0, 16*1024 + 4096,
+				PROT_WRITE | PROT_READ);
+		gem_set_domain(fd, object[1].handle,
+				I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+		memset(reloc, 0, sizeof(reloc));
+		b = batch;
+		for (int i = 0; i < 1024; i++) {
+			uint64_t offset;
+
+			reloc[i].presumed_offset = object[0].offset;
+			reloc[i].offset = (b - batch + 1) * sizeof(*batch);
+			reloc[i].delta = i * sizeof(uint32_t);
+			reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+			reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+			offset = object[0].offset + reloc[i].delta;
+			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			if (gen >= 8) {
+				*b++ = offset;
+				*b++ = offset >> 32;
+			} else if (gen >= 4) {
+				*b++ = 0;
+				*b++ = offset;
+				reloc[i].offset += sizeof(*batch);
+			} else {
+				b[-1] -= 1;
+				*b++ = offset;
+			}
+			*b++ = i;
+		}
+		*b++ = MI_BATCH_BUFFER_END;
+		igt_assert((b - batch)*sizeof(uint32_t) < 20*1024);
+		munmap(batch, 16*1024+4096);
+		execbuf.buffer_count = 2;
+		gem_execbuf(fd, &execbuf);
+		gem_sync(fd, object[1].handle);
+
+		start = gettime();
+		cycles = 0;
+		do {
+			do {
+				igt_permute_array(engines, num_engines, xchg);
+				for (int n = 0; n < num_engines; n++) {
+					execbuf.flags &= ~ENGINE_MASK;
+					execbuf.flags |= engines[n];
+					gem_execbuf(fd, &execbuf);
+				}
+				gem_sync(fd, object[1].handle);
+			} while (++cycles & 1023);
+		} while ((elapsed = gettime() - start) < SLOW_QUICK(10, 1));
+		igt_info("Completed %ld cycles: %.3f us\n",
+			 cycles, elapsed*1e6/cycles);
+
+		gem_close(fd, object[1].handle);
+		gem_close(fd, object[0].handle);
+	}
+	igt_waitchildren_timeout(20, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
 igt_main
 {
 	const struct intel_execution_engine *e;
@@ -218,19 +487,31 @@ igt_main
 	for (e = intel_execution_engines; e->name; e++) {
 		igt_subtest_f("%s", e->name)
 			sync_ring(fd, e->exec_id | e->flags, 1);
+		igt_subtest_f("store-%s", e->name)
+			store_ring(fd, e->exec_id | e->flags, 1);
 		igt_subtest_f("forked-%s", e->name)
 			sync_ring(fd, e->exec_id | e->flags, ncpus);
+		igt_subtest_f("forked-store-%s", e->name)
+			store_ring(fd, e->exec_id | e->flags, ncpus);
 	}
 
 	igt_subtest("basic-each")
 		sync_ring(fd, ~0u, 1);
+	igt_subtest("basic-store-each")
+		store_ring(fd, ~0u, 1);
 	igt_subtest("forked-each")
 		sync_ring(fd, ~0u, ncpus);
+	igt_subtest("forked-store-each")
+		store_ring(fd, ~0u, ncpus);
 
 	igt_subtest("basic-all")
 		sync_all(fd, 1);
+	igt_subtest("basic-store-all")
+		store_all(fd, 1);
 	igt_subtest("forked-all")
 		sync_all(fd, ncpus);
+	igt_subtest("forked-store-all")
+		store_all(fd, ncpus);
 
 	igt_fixture {
 		igt_stop_hang_detector();
author	Chris Wilson <chris@chris-wilson.co.uk>	2016-07-04 12:37:32 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2016-07-04 13:02:34 +0100
commit	d2c6196aaad63653596899d2aec40120c7194c71 (patch)
tree	849909424b72bdba9dab31cc2d42f2e14bd93fe3 /tests/gem_sync.c
parent	c2893a90365d101d0a07de8bd5b882d53817bb19 (diff)