From 741bf7064c467df725c14cc0b3b8b50436f9ee09 Mon Sep 17 00:00:00 2001
From: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Date: Thu, 18 Oct 2018 14:06:42 +0300
Subject: tests: Introduce i915 directory

We can already move all the tests with distinct prefixes: gem_, gen3_
and i915_.

pm_ and drv_ tests will follow in batches, so we can do the
adjustments in the reporting/filtering layer of the CI system.

v2: Fix test-list.txt generation with meson
v3: Fix docs build (Petri)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Petri Latvala <petri.latvala@intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Signed-off-by: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Reviewed-by: Petri Latvala <petri.latvala@intel.com>
Tested-by: Petri Latvala <petri.latvala@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_latency.c | 721 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 721 insertions(+)
 create mode 100644 tests/i915/gem_exec_latency.c

(limited to 'tests/i915/gem_exec_latency.c')

diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
new file mode 100644
index 00000000..de16322a
--- /dev/null
+++ b/tests/i915/gem_exec_latency.c
@@ -0,0 +1,721 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/signal.h>
+#include <time.h>
+#include <sched.h>
+
+#include "drm.h"
+
+#include "igt_sysfs.h"
+#include "igt_vgem.h"
+#include "igt_dummyload.h"
+#include "igt_stats.h"
+
+#include "i915/gem_ring.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+#define CORK 0x1
+#define PREEMPT 0x2
+
+static unsigned int ring_size;
+
+static void
+poll_ring(int fd, unsigned ring, const char *name)
+{
+	const struct igt_spin_factory opts = {
+		.engine = ring,
+		.flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST,
+	};
+	struct timespec tv = {};
+	unsigned long cycles;
+	igt_spin_t *spin[2];
+	uint64_t elapsed;
+	uint32_t cmd;
+
+	gem_require_ring(fd, ring);
+	igt_require(gem_can_store_dword(fd, ring));
+
+	spin[0] = __igt_spin_batch_factory(fd, &opts);
+	igt_assert(spin[0]->running);
+	cmd = *spin[0]->batch;
+
+	spin[1] = __igt_spin_batch_factory(fd, &opts);
+	igt_assert(spin[1]->running);
+	igt_assert(cmd == *spin[1]->batch);
+
+	igt_spin_batch_end(spin[0]);
+	while (!READ_ONCE(*spin[1]->running))
+		;
+	igt_assert(!gem_bo_busy(fd, spin[0]->handle));
+
+	cycles = 0;
+	while ((elapsed = igt_nsec_elapsed(&tv)) < 2ull << 30) {
+		unsigned int idx = cycles++ & 1;
+
+		*spin[idx]->batch = cmd;
+		*spin[idx]->running = 0;
+		gem_execbuf(fd, &spin[idx]->execbuf);
+
+		igt_spin_batch_end(spin[!idx]);
+		while (!READ_ONCE(*spin[idx]->running))
+			;
+	}
+
+	igt_info("%s completed %ld cycles: %.3f us\n",
+		 name, cycles, elapsed*1e-3/cycles);
+
+	igt_spin_batch_free(fd, spin[1]);
+	igt_spin_batch_free(fd, spin[0]);
+}
+
+#define RCS_TIMESTAMP (0x2000 + 0x358)
+static void latency_on_ring(int fd,
+			    unsigned ring, const char *name,
+			    unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const int has_64bit_reloc = gen >= 8;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	IGT_CORK_HANDLE(c);
+	volatile uint32_t *reg;
+	unsigned repeats = ring_size;
+	uint32_t start, end, *map, *results;
+	uint64_t offset;
+	double gpu_latency;
+	int i, j;
+
+	reg = (volatile uint32_t *)((volatile char *)igt_global_mmio + RCS_TIMESTAMP);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, 4096);
+	obj[1].flags = EXEC_OBJECT_WRITE;
+	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
+
+	obj[2].handle = gem_create(fd, 64*1024);
+	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+	map[0] = MI_BATCH_BUFFER_END;
+	gem_execbuf(fd, &execbuf);
+
+	memset(&reloc,0, sizeof(reloc));
+	obj[2].relocation_count = 1;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+
+	reloc.target_handle = flags & CORK ? 1 : 0;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.presumed_offset = obj[1].offset;
+
+	for (j = 0; j < repeats; j++) {
+		execbuf.batch_start_offset = 64 * j;
+		reloc.offset =
+			execbuf.batch_start_offset + sizeof(uint32_t);
+		reloc.delta = sizeof(uint32_t) * j;
+
+		offset = reloc.presumed_offset;
+		offset += reloc.delta;
+
+		i = 16 * j;
+		/* MI_STORE_REG_MEM */
+		map[i++] = 0x24 << 23 | 1;
+		if (has_64bit_reloc)
+			map[i-1]++;
+		map[i++] = RCS_TIMESTAMP; /* ring local! */
+		map[i++] = offset;
+		if (has_64bit_reloc)
+			map[i++] = offset >> 32;
+		map[i++] = MI_BATCH_BUFFER_END;
+	}
+
+	if (flags & CORK) {
+		obj[0].handle = igt_cork_plug(&c, fd);
+		execbuf.buffers_ptr = to_user_pointer(&obj[0]);
+		execbuf.buffer_count = 3;
+	}
+
+	start = *reg;
+	for (j = 0; j < repeats; j++) {
+		uint64_t presumed_offset = reloc.presumed_offset;
+
+		execbuf.batch_start_offset = 64 * j;
+		reloc.offset =
+			execbuf.batch_start_offset + sizeof(uint32_t);
+		reloc.delta = sizeof(uint32_t) * j;
+
+		gem_execbuf(fd, &execbuf);
+		igt_assert(reloc.presumed_offset == presumed_offset);
+	}
+	end = *reg;
+	igt_assert(reloc.presumed_offset == obj[1].offset);
+
+	if (flags & CORK)
+		igt_cork_unplug(&c);
+
+	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
+	gpu_latency = (results[repeats-1] - results[0]) / (double)(repeats-1);
+
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	execbuf.batch_start_offset = 0;
+	for (j = 0; j < repeats - 1; j++) {
+		offset = obj[2].offset;
+		offset += 64 * (j + 1);
+
+		i = 16 * j + (has_64bit_reloc ? 4 : 3);
+		map[i] = MI_BATCH_BUFFER_START;
+		if (gen >= 8) {
+			map[i] |= 1 << 8 | 1;
+			map[i + 1] = offset;
+			map[i + 2] = offset >> 32;
+		} else if (gen >= 6) {
+			map[i] |= 1 << 8;
+			map[i + 1] = offset;
+		} else {
+			map[i] |= 2 << 6;
+			map[i + 1] = offset;
+			if (gen < 4)
+				map[i] |= 1;
+		}
+	}
+	offset = obj[2].offset;
+	gem_execbuf(fd, &execbuf);
+	igt_assert(offset == obj[2].offset);
+
+	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
+	igt_info("%s: dispatch latency: %.2f, execution latency: %.2f (target %.2f)\n",
+		 name,
+		 (end - start) / (double)repeats,
+		 gpu_latency, (results[repeats - 1] - results[0]) / (double)(repeats - 1));
+
+	munmap(map, 64*1024);
+	munmap(results, 4096);
+	if (flags & CORK)
+		gem_close(fd, obj[0].handle);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[2].handle);
+}
+
+static void latency_from_ring(int fd,
+			      unsigned ring, const char *name,
+			      unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const int has_64bit_reloc = gen >= 8;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const unsigned int repeats = ring_size / 2;
+	unsigned int other;
+	uint32_t *map, *results;
+	uint32_t ctx[2] = {};
+	int i, j;
+
+	if (flags & PREEMPT) {
+		ctx[0] = gem_context_create(fd);
+		gem_context_set_priority(fd, ctx[0], -1023);
+
+		ctx[1] = gem_context_create(fd);
+		gem_context_set_priority(fd, ctx[1], 1023);
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.rsvd1 = ctx[1];
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, 4096);
+	obj[1].flags = EXEC_OBJECT_WRITE;
+	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
+
+	obj[2].handle = gem_create(fd, 64*1024);
+	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+	map[0] = MI_BATCH_BUFFER_END;
+	gem_execbuf(fd, &execbuf);
+
+	memset(&reloc,0, sizeof(reloc));
+	obj[2].relocation_count = 1;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.presumed_offset = obj[1].offset;
+	reloc.target_handle = flags & CORK ? 1 : 0;
+
+	for_each_physical_engine(fd, other) {
+		igt_spin_t *spin = NULL;
+		IGT_CORK_HANDLE(c);
+
+		gem_set_domain(fd, obj[2].handle,
+			       I915_GEM_DOMAIN_GTT,
+			       I915_GEM_DOMAIN_GTT);
+
+		if (flags & PREEMPT)
+			spin = __igt_spin_batch_new(fd,
+						    .ctx = ctx[0],
+						    .engine = ring);
+
+		if (flags & CORK) {
+			obj[0].handle = igt_cork_plug(&c, fd);
+			execbuf.buffers_ptr = to_user_pointer(&obj[0]);
+			execbuf.buffer_count = 3;
+		}
+
+		for (j = 0; j < repeats; j++) {
+			uint64_t offset;
+
+			execbuf.flags &= ~ENGINE_FLAGS;
+			execbuf.flags |= ring;
+
+			execbuf.batch_start_offset = 64 * j;
+			reloc.offset =
+				execbuf.batch_start_offset + sizeof(uint32_t);
+			reloc.delta = sizeof(uint32_t) * j;
+
+			reloc.presumed_offset = obj[1].offset;
+			offset = reloc.presumed_offset;
+			offset += reloc.delta;
+
+			i = 16 * j;
+			/* MI_STORE_REG_MEM */
+			map[i++] = 0x24 << 23 | 1;
+			if (has_64bit_reloc)
+				map[i-1]++;
+			map[i++] = RCS_TIMESTAMP; /* ring local! */
+			map[i++] = offset;
+			if (has_64bit_reloc)
+				map[i++] = offset >> 32;
+			map[i++] = MI_BATCH_BUFFER_END;
+
+			gem_execbuf(fd, &execbuf);
+
+			execbuf.flags &= ~ENGINE_FLAGS;
+			execbuf.flags |= other;
+
+			execbuf.batch_start_offset = 64 * (j + repeats);
+			reloc.offset =
+				execbuf.batch_start_offset + sizeof(uint32_t);
+			reloc.delta = sizeof(uint32_t) * (j + repeats);
+
+			reloc.presumed_offset = obj[1].offset;
+			offset = reloc.presumed_offset;
+			offset += reloc.delta;
+
+			i = 16 * (j + repeats);
+			/* MI_STORE_REG_MEM */
+			map[i++] = 0x24 << 23 | 1;
+			if (has_64bit_reloc)
+				map[i-1]++;
+			map[i++] = RCS_TIMESTAMP; /* ring local! */
+			map[i++] = offset;
+			if (has_64bit_reloc)
+				map[i++] = offset >> 32;
+			map[i++] = MI_BATCH_BUFFER_END;
+
+			gem_execbuf(fd, &execbuf);
+		}
+
+		if (flags & CORK)
+			igt_cork_unplug(&c);
+		gem_set_domain(fd, obj[1].handle,
+			       I915_GEM_DOMAIN_GTT,
+			       I915_GEM_DOMAIN_GTT);
+		igt_spin_batch_free(fd, spin);
+
+		igt_info("%s-%s delay: %.2f\n",
+			 name, e__->name,
+			 (results[2*repeats-1] - results[0]) / (double)repeats);
+	}
+
+	munmap(map, 64*1024);
+	munmap(results, 4096);
+
+	if (flags & CORK)
+		gem_close(fd, obj[0].handle);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[2].handle);
+
+	if (flags & PREEMPT) {
+		gem_context_destroy(fd, ctx[1]);
+		gem_context_destroy(fd, ctx[0]);
+	}
+}
+
+static void __rearm_spin_batch(igt_spin_t *spin)
+{
+	const uint32_t mi_arb_chk = 0x5 << 23;
+
+       *spin->batch = mi_arb_chk;
+       *spin->running = 0;
+       __sync_synchronize();
+}
+
+static void
+__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags)
+{
+	struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
+
+	eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
+	eb.flags |= flags | I915_EXEC_NO_RELOC;
+
+	gem_execbuf(fd, &eb);
+}
+
+struct rt_pkt {
+	struct igt_mean mean;
+	double min, max;
+};
+
+static bool __spin_wait(int fd, igt_spin_t *spin)
+{
+	while (!READ_ONCE(*spin->running)) {
+		if (!gem_bo_busy(fd, spin->handle))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Test whether RT thread which hogs the CPU a lot can submit work with
+ * reasonable latency.
+ */
+static void
+rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned int flags)
+#define RTIDLE 0x1
+{
+	const char *passname[] = {
+		"warmup",
+		"normal",
+		"rt[0]",
+		"rt[1]",
+		"rt[2]",
+		"rt[3]",
+		"rt[4]",
+		"rt[5]",
+		"rt[6]",
+	};
+#define NPASS ARRAY_SIZE(passname)
+#define MMAP_SZ (64 << 10)
+	const struct igt_spin_factory opts = {
+		.engine = engine,
+		.flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST,
+	};
+	struct rt_pkt *results;
+	unsigned int engines[16];
+	const char *names[16];
+	unsigned int nengine;
+	int ret;
+
+	igt_assert(ARRAY_SIZE(engines) * NPASS * sizeof(*results) <= MMAP_SZ);
+	results = mmap(NULL, MMAP_SZ, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(results != MAP_FAILED);
+
+	nengine = 0;
+	if (engine == ALL_ENGINES) {
+		for_each_physical_engine(fd, engine) {
+			if (!gem_can_store_dword(fd, engine))
+				continue;
+
+			engines[nengine] = engine;
+			names[nengine] = e__->name;
+			nengine++;
+		}
+		igt_require(nengine > 1);
+	} else {
+		igt_require(gem_can_store_dword(fd, engine));
+		engines[nengine] = engine;
+		names[nengine] = name;
+		nengine++;
+	}
+
+	gem_quiescent_gpu(fd);
+
+	igt_fork(child, nengine) {
+		unsigned int pass = 0; /* Three phases: warmup, normal, rt. */
+
+		engine = engines[child];
+		do {
+			struct igt_mean mean;
+			double min = HUGE_VAL;
+			double max = -HUGE_VAL;
+			igt_spin_t *spin;
+
+			igt_mean_init(&mean);
+
+			if (pass == 2) {
+				struct sched_param rt =
+				{ .sched_priority = 99 };
+
+				ret = sched_setscheduler(0,
+							 SCHED_FIFO | SCHED_RESET_ON_FORK,
+							 &rt);
+				if (ret) {
+					igt_warn("Failed to set scheduling policy!\n");
+					break;
+				}
+			}
+
+			usleep(250);
+
+			spin = __igt_spin_batch_factory(fd, &opts);
+			if (!spin) {
+				igt_warn("Failed to create spinner! (%s)\n",
+					 passname[pass]);
+				break;
+			}
+			igt_spin_busywait_until_running(spin);
+
+			igt_until_timeout(pass > 0 ? 5 : 2) {
+				struct timespec ts = { };
+				double t;
+
+				igt_spin_batch_end(spin);
+				gem_sync(fd, spin->handle);
+				if (flags & RTIDLE)
+					igt_drop_caches_set(fd, DROP_IDLE);
+
+				/*
+				 * If we are oversubscribed (more RT hogs than
+				 * cpus) give the others a change to run;
+				 * otherwise, they will interrupt us in the
+				 * middle of the measurement.
+				 */
+				if (nengine > 1)
+					usleep(10*nengine);
+
+				__rearm_spin_batch(spin);
+
+				igt_nsec_elapsed(&ts);
+				__submit_spin_batch(fd, spin, engine);
+				if (!__spin_wait(fd, spin)) {
+					igt_warn("Wait timeout! (%s)\n",
+						 passname[pass]);
+					break;
+				}
+
+				t = igt_nsec_elapsed(&ts) * 1e-9;
+				if (t > max)
+					max = t;
+				if (t < min)
+					min = t;
+
+				igt_mean_add(&mean, t);
+			}
+
+			igt_spin_batch_free(fd, spin);
+
+			igt_info("%8s %10s: mean=%.2fus stddev=%.3fus [%.2fus, %.2fus] (n=%lu)\n",
+				 names[child],
+				 passname[pass],
+				 igt_mean_get(&mean) * 1e6,
+				 sqrt(igt_mean_get_variance(&mean)) * 1e6,
+				 min * 1e6, max * 1e6,
+				 mean.count);
+
+			results[NPASS * child + pass].mean = mean;
+			results[NPASS * child + pass].min = min;
+			results[NPASS * child + pass].max = max;
+		} while (++pass < NPASS);
+	}
+
+	igt_waitchildren();
+
+	for (unsigned int child = 0; child < nengine; child++) {
+		struct rt_pkt normal = results[NPASS * child + 1];
+		igt_stats_t stats;
+		double variance;
+
+		igt_stats_init_with_size(&stats, NPASS);
+
+		for (unsigned int pass = 2; pass < NPASS; pass++) {
+			struct rt_pkt *rt = &results[NPASS * child + pass];
+
+			igt_assert(rt->max);
+
+			igt_stats_push_float(&stats, igt_mean_get(&rt->mean));
+			variance += igt_mean_get_variance(&rt->mean);
+		}
+		variance /= NPASS - 2;
+
+		igt_info("%8s: normal latency=%.2f±%.3fus, rt latency=%.2f±%.3fus\n",
+			 names[child],
+			 igt_mean_get(&normal.mean) * 1e6,
+			 sqrt(igt_mean_get_variance(&normal.mean)) * 1e6,
+			 igt_stats_get_median(&stats) * 1e6,
+			 sqrt(variance) * 1e6);
+
+		igt_assert(igt_stats_get_median(&stats) <
+			   igt_mean_get(&normal.mean) * 2);
+
+		/* The system is noisy; be conservative when declaring fail. */
+		igt_assert(variance < igt_mean_get_variance(&normal.mean) * 10);
+	}
+
+	munmap(results, MMAP_SZ);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int device = -1;
+
+	igt_fixture {
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+		gem_require_mmap_wc(device);
+
+		gem_submission_print_method(device);
+
+		ring_size = gem_measure_ring_inflight(device, ALL_ENGINES, 0);
+		igt_info("Ring size: %d batches\n", ring_size);
+		igt_require(ring_size > 8);
+		ring_size -= 8; /* leave some spare */
+		if (ring_size > 1024)
+			ring_size = 1024;
+
+		intel_register_access_init(intel_get_pci_device(), false, device);
+	}
+
+	igt_subtest("all-rtidle-submit")
+		rthog_latency_on_ring(device, ALL_ENGINES, "all", RTIDLE);
+
+	igt_subtest("all-rthog-submit")
+		rthog_latency_on_ring(device, ALL_ENGINES, "all", 0);
+
+	igt_subtest_group {
+		igt_fixture
+			igt_require(intel_gen(intel_get_drm_devid(device)) >= 7);
+
+		for (e = intel_execution_engines; e->name; e++) {
+			if (e->exec_id == 0)
+				continue;
+
+			igt_subtest_group {
+				igt_fixture {
+					igt_require(gem_ring_has_physical_engine(device, e->exec_id | e->flags));
+				}
+
+				igt_subtest_f("%s-dispatch", e->name)
+					latency_on_ring(device,
+							e->exec_id | e->flags,
+							e->name, 0);
+
+				igt_subtest_f("%s-poll", e->name)
+					poll_ring(device,
+						  e->exec_id | e->flags,
+						  e->name);
+
+				igt_subtest_f("%s-rtidle-submit", e->name)
+					rthog_latency_on_ring(device,
+							      e->exec_id |
+							      e->flags,
+							      e->name,
+							      RTIDLE);
+
+				igt_subtest_f("%s-rthog-submit", e->name)
+					rthog_latency_on_ring(device,
+							      e->exec_id |
+							      e->flags,
+							      e->name,
+							      0);
+
+				igt_subtest_f("%s-dispatch-queued", e->name)
+					latency_on_ring(device,
+							e->exec_id | e->flags,
+							e->name, CORK);
+
+				igt_subtest_f("%s-synchronisation", e->name)
+					latency_from_ring(device,
+							  e->exec_id | e->flags,
+							  e->name, 0);
+
+				igt_subtest_f("%s-synchronisation-queued", e->name)
+					latency_from_ring(device,
+							  e->exec_id | e->flags,
+							  e->name, CORK);
+
+				igt_subtest_group {
+					igt_fixture {
+						gem_require_contexts(device);
+						igt_require(gem_scheduler_has_preemption(device));
+					}
+
+					igt_subtest_f("%s-preemption", e->name)
+						latency_from_ring(device,
+								  e->exec_id | e->flags,
+								  e->name, PREEMPT);
+				}
+			}
+		}
+	}
+
+	igt_fixture {
+		close(device);
+	}
+}
-- 
cgit v1.2.3