/*
 * Copyright © 2016 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/signal.h>

#include "i915/gem.h"
#include "i915/gem_create.h"
#include "igt.h"
#include "igt_store.h"
#include "igt_syncobj.h"
#include "igt_sysfs.h"
#include "igt_vgem.h"
#include "intel_ctx.h"
#include "sw_sync.h"

IGT_TEST_DESCRIPTION("Check that execbuf waits for explicit fences");

#ifndef SYNC_IOC_MERGE
struct sync_merge_data {
	char    name[32];
	int32_t fd2;
	int32_t fence;
	uint32_t        flags;
	uint32_t        pad;
};
#define SYNC_IOC_MAGIC '>'
#define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
#endif

#define MI_SEMAPHORE_WAIT		(0x1c << 23)
#define   MI_SEMAPHORE_POLL             (1 << 15)
#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)

static bool fence_busy(int fence)
{
	return poll(&(struct pollfd){fence, POLLIN}, 1, 0) == 0;
}

#define HANG 0x1
#define NONBLOCK 0x2
#define WAIT 0x4

static void test_fence_busy(int fd, const intel_ctx_t *ctx,
			    const struct intel_execution_engine2 *e,
			    unsigned flags)
{
	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_relocation_entry reloc;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct timespec tv;
	uint32_t *batch;
	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
	int fence, i, timeout;

	if ((flags & HANG) == 0)
		igt_require(gem_class_has_mutable_submission(fd, e->class));

	gem_quiescent_gpu(fd);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = e->flags | I915_EXEC_FENCE_OUT;
	execbuf.rsvd1 = ctx->id;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	obj.offset = get_offset(ahnd, obj.handle, 4096, 0);

	batch = gem_mmap__device_coherent(fd, obj.handle, 0, 4096, PROT_WRITE);
	gem_set_domain(fd, obj.handle,
		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);

	i = 0;
	if ((flags & HANG) == 0)
		batch[i++] = 0x5 << 23;

	if (!ahnd) {
		obj.relocs_ptr = to_user_pointer(&reloc);
		obj.relocation_count = 1;
		memset(&reloc, 0, sizeof(reloc));
		reloc.target_handle = obj.handle; /* recurse */
		reloc.presumed_offset = obj.offset;
		reloc.offset = (i + 1) * sizeof(uint32_t);
		reloc.delta = 0;
		reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
		reloc.write_domain = 0;
	} else {
		obj.flags |= EXEC_OBJECT_PINNED;
	}

	batch[i] = MI_BATCH_BUFFER_START;
	if (gen >= 8) {
		batch[i] |= 1 << 8 | 1;
		batch[++i] = obj.offset;
		batch[++i] = obj.offset >> 32;
	} else if (gen >= 6) {
		batch[i] |= 1 << 8;
		batch[++i] = obj.offset;
	} else {
		batch[i] |= 2 << 6;
		batch[++i] = obj.offset;
		if (gen < 4) {
			batch[i] |= 1;
			reloc.delta = 1;
		}
	}
	i++;

	execbuf.rsvd2 = -1;
	gem_execbuf_wr(fd, &execbuf);
	fence = execbuf.rsvd2 >> 32;
	igt_assert(fence != -1);

	igt_assert(gem_bo_busy(fd, obj.handle));
	igt_assert(fence_busy(fence));

	timeout = 120;
	if ((flags & HANG) == 0) {
		*batch = MI_BATCH_BUFFER_END;
		__sync_synchronize();
		timeout = 1;
	}
	munmap(batch, 4096);

	if (flags & WAIT) {
		struct pollfd pfd = { .fd = fence, .events = POLLIN };
		igt_assert(poll(&pfd, 1, timeout*1000) == 1);
	} else {
		memset(&tv, 0, sizeof(tv));
		while (fence_busy(fence))
			igt_assert(igt_seconds_elapsed(&tv) < timeout);
	}

	igt_assert(!gem_bo_busy(fd, obj.handle));
	igt_assert_eq(sync_fence_status(fence),
		      flags & HANG ? -EIO : SYNC_FENCE_OK);

	close(fence);
	gem_close(fd, obj.handle);
	put_offset(ahnd, obj.handle);
	put_ahnd(ahnd);

	gem_quiescent_gpu(fd);
}

static void test_fence_busy_all(int fd, const intel_ctx_t *ctx, unsigned flags)
{
	const struct intel_execution_engine2 *e;
	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_relocation_entry reloc;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct timespec tv;
	uint32_t *batch;
	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
	int all, i, timeout;

	gem_quiescent_gpu(fd);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	obj.offset = get_offset(ahnd, obj.handle, 4096, 0);
	igt_assert(obj.offset != -1);

	batch = gem_mmap__device_coherent(fd, obj.handle, 0, 4096, PROT_WRITE);
	gem_set_domain(fd, obj.handle,
		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);

	i = 0;
	if ((flags & HANG) == 0)
		batch[i++] = 0x5 << 23;

	if (!ahnd) {
		obj.relocs_ptr = to_user_pointer(&reloc);
		obj.relocation_count = 1;
		memset(&reloc, 0, sizeof(reloc));
		reloc.target_handle = obj.handle; /* recurse */
		reloc.presumed_offset = obj.offset;
		reloc.offset = (i + 1) * sizeof(uint32_t);
		reloc.delta = 0;
		reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
		reloc.write_domain = 0;
	} else {
		obj.flags |= EXEC_OBJECT_PINNED;
	}

	batch[i] = MI_BATCH_BUFFER_START;
	if (gen >= 8) {
		batch[i] |= 1 << 8 | 1;
		batch[++i] = obj.offset;
		batch[++i] = obj.offset >> 32;
	} else if (gen >= 6) {
		batch[i] |= 1 << 8;
		batch[++i] = obj.offset;
	} else {
		batch[i] |= 2 << 6;
		batch[++i] = obj.offset;
		if (gen < 4) {
			batch[i] |= 1;
			reloc.delta = 1;
		}
	}
	i++;

	all = -1;
	for_each_ctx_engine(fd, ctx, e) {
		int fence, new;

		if ((flags & HANG) == 0 &&
		    !gem_class_has_mutable_submission(fd, e->class))
			continue;

		execbuf.flags = e->flags | I915_EXEC_FENCE_OUT;
		execbuf.rsvd1 = ctx->id;
		execbuf.rsvd2 = -1;
		gem_execbuf_wr(fd, &execbuf);
		fence = execbuf.rsvd2 >> 32;
		igt_assert(fence != -1);

		if (all < 0) {
			all = fence;
			continue;
		}

		new = sync_fence_merge(all, fence);
		igt_assert_lte(0, new);
		close(all);
		close(fence);

		all = new;
	}

	igt_assert(gem_bo_busy(fd, obj.handle));
	igt_assert(fence_busy(all));

	timeout = 120;
	if ((flags & HANG) == 0) {
		*batch = MI_BATCH_BUFFER_END;
		__sync_synchronize();
		timeout = 1;
	}
	munmap(batch, 4096);

	if (flags & WAIT) {
		struct pollfd pfd = { .fd = all, .events = POLLIN };
		igt_assert(poll(&pfd, 1, timeout*1000) == 1);
	} else {
		memset(&tv, 0, sizeof(tv));
		while (fence_busy(all))
			igt_assert(igt_seconds_elapsed(&tv) < timeout);
	}

	igt_assert(!gem_bo_busy(fd, obj.handle));
	igt_assert_eq(sync_fence_status(all),
		      flags & HANG ? -EIO : SYNC_FENCE_OK);

	close(all);
	gem_close(fd, obj.handle);
	put_offset(ahnd, obj.handle);
	put_ahnd(ahnd);

	gem_quiescent_gpu(fd);
}

static unsigned int spin_hang(unsigned int flags)
{
	if (!(flags & HANG))
		return 0;

	return IGT_SPIN_NO_PREEMPTION | IGT_SPIN_INVALID_CS;
}

static void test_fence_await(int fd, const intel_ctx_t *ctx,
			     const struct intel_execution_engine2 *e,
			     unsigned flags)
{
	const struct intel_execution_engine2 *e2;
	uint32_t scratch = gem_create(fd, 4096);
	igt_spin_t *spin;
	uint32_t *out;
	uint64_t scratch_offset, ahnd = get_reloc_ahnd(fd, ctx->id);
	int i;

	scratch_offset = get_offset(ahnd, scratch, 4096, 0);

	out = gem_mmap__device_coherent(fd, scratch, 0, 4096, PROT_WRITE);
	gem_set_domain(fd, scratch,
			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);

	spin = igt_spin_new(fd,
			    .ahnd = ahnd,
			    .ctx = ctx,
			    .engine = e->flags,
			    .flags = IGT_SPIN_FENCE_OUT | spin_hang(flags));
	igt_assert(spin->out_fence != -1);

	i = 0;
	for_each_ctx_engine(fd, ctx, e2) {
		if (!gem_class_can_store_dword(fd, e->class))
			continue;

		if (flags & NONBLOCK) {
			igt_store_word(fd, ahnd, ctx, e2, spin->out_fence,
				       scratch, scratch_offset, i, i);
		} else {
			igt_fork(child, 1) {
				ahnd = get_reloc_ahnd(fd, ctx->id);
				igt_store_word(fd, ahnd, ctx, e2, spin->out_fence,
					       scratch, scratch_offset, i, i);
				put_ahnd(ahnd);
			}
		}

		i++;
	}

	/* Long, but not too long to anger preemption disable checks */
	usleep(50 * 1000); /* 50 ms, typical preempt reset is 150+ms */

	/* Check for invalidly completing the task early */
	igt_assert(fence_busy(spin->out_fence));
	for (int n = 0; n < i; n++)
		igt_assert_eq_u32(out[n], 0);

	if ((flags & HANG) == 0)
		igt_spin_end(spin);

	igt_waitchildren();

	gem_set_domain(fd, scratch, I915_GEM_DOMAIN_GTT, 0);
	while (i--)
		igt_assert_eq_u32(out[i], i);
	munmap(out, 4096);

	igt_spin_free(fd, spin);
	gem_close(fd, scratch);
	put_offset(ahnd, scratch);
	put_ahnd(ahnd);
}

static uint32_t timeslicing_batches(int i915, uint32_t *offset)
{
        uint32_t handle = gem_create(i915, 4096);
        uint32_t cs[256];

	*offset += 4000;
	for (int pair = 0; pair <= 1; pair++) {
		int x = 1;
		int i = 0;

		for (int step = 0; step < 8; step++) {
			if (pair) {
				cs[i++] =
					MI_SEMAPHORE_WAIT |
					MI_SEMAPHORE_POLL |
					MI_SEMAPHORE_SAD_EQ_SDD |
					(4 - 2);
				cs[i++] = x++;
				cs[i++] = *offset;
				cs[i++] = 0;
			}

			cs[i++] = MI_STORE_DWORD_IMM;
			cs[i++] = *offset;
			cs[i++] = 0;
			cs[i++] = x++;

			if (!pair) {
				cs[i++] =
					MI_SEMAPHORE_WAIT |
					MI_SEMAPHORE_POLL |
					MI_SEMAPHORE_SAD_EQ_SDD |
					(4 - 2);
				cs[i++] = x++;
				cs[i++] = *offset;
				cs[i++] = 0;
			}
		}

		cs[i++] = MI_BATCH_BUFFER_END;
		igt_assert(i < ARRAY_SIZE(cs));
		gem_write(i915, handle, pair * sizeof(cs), cs, sizeof(cs));
	}

	*offset = sizeof(cs);
        return handle;
}

static void test_submit_fence(int i915, const intel_ctx_t *ctx,
			      const struct intel_execution_engine2 *e)
{
	const struct intel_execution_engine2 *e2;

	/*
	 * Create a pair of interlocking batches, that ping pong
	 * between each other, and only advance one step at a time.
	 * We require the kernel to preempt at each semaphore and
	 * switch to the other batch in order to advance.
	 */

	for_each_ctx_engine(i915, ctx, e2) {
		unsigned int offset = 24 << 20;
		const intel_ctx_t *tmp_ctx;
		struct drm_i915_gem_exec_object2 obj = {
			.offset = offset,
			.flags = EXEC_OBJECT_PINNED,
		};
		struct drm_i915_gem_execbuffer2 execbuf  = {
			.buffers_ptr = to_user_pointer(&obj),
			.buffer_count = 1,
		};
		uint32_t *result;
		int out;

		obj.handle = timeslicing_batches(i915, &offset);
		result = gem_mmap__device_coherent(i915, obj.handle,
						   0, 4096, PROT_READ);

		execbuf.flags = e->flags | I915_EXEC_FENCE_OUT;
		execbuf.batch_start_offset = 0;
		execbuf.rsvd1 = ctx->id;
		gem_execbuf_wr(i915, &execbuf);

		tmp_ctx = intel_ctx_create(i915, &ctx->cfg);
		execbuf.rsvd1 = tmp_ctx->id;
		execbuf.rsvd2 >>= 32;
		execbuf.flags = e2->flags;
		execbuf.flags |= I915_EXEC_FENCE_SUBMIT | I915_EXEC_FENCE_OUT;
		execbuf.batch_start_offset = offset;
		gem_execbuf_wr(i915, &execbuf);
		intel_ctx_destroy(i915, tmp_ctx);

		gem_sync(i915, obj.handle);
		gem_close(i915, obj.handle);

		/* no hangs! */
		out = execbuf.rsvd2;
		igt_assert_eq(sync_fence_status(out), 1);
		close(out);

		out = execbuf.rsvd2 >> 32;
		igt_assert_eq(sync_fence_status(out), 1);
		close(out);

		igt_assert_eq(result[1000], 16);
		munmap(result, 4096);
	}
}

static uint32_t submitN_batches(int i915, uint32_t offset, int count)
{
        uint32_t handle = gem_create(i915, (count + 1) * 1024);
        uint32_t cs[256];

	for (int pair = 0; pair < count; pair++) {
		int x = pair;
		int i = 0;

		for (int step = 0; step < 8; step++) {
			cs[i++] =
				MI_SEMAPHORE_WAIT |
				MI_SEMAPHORE_POLL |
				MI_SEMAPHORE_SAD_EQ_SDD |
				(4 - 2);
			cs[i++] = x;
			cs[i++] = offset;
			cs[i++] = 0;

			cs[i++] = MI_STORE_DWORD_IMM;
			cs[i++] = offset;
			cs[i++] = 0;
			cs[i++] = x + 1;

			x += count;
		}

		cs[i++] = MI_BATCH_BUFFER_END;
		igt_assert(i < ARRAY_SIZE(cs));
		gem_write(i915, handle, (pair + 1) * sizeof(cs),
			  cs, sizeof(cs));
	}

        return handle;
}

static void test_submitN(int i915, const intel_ctx_t *ctx,
			 const struct intel_execution_engine2 *e,
			 int count)
{
	unsigned int offset = 24 << 20;
	unsigned int sz = ALIGN((count + 1) * 1024, 4096);
	struct drm_i915_gem_exec_object2 obj = {
		.handle = submitN_batches(i915, offset, count),
		.offset = offset,
		.flags = EXEC_OBJECT_PINNED,
	};
	struct drm_i915_gem_execbuffer2 execbuf  = {
		.buffers_ptr = to_user_pointer(&obj),
		.buffer_count = 1,
		.flags = e->flags | I915_EXEC_FENCE_OUT,
		.rsvd1 = ctx->id,
	};
	uint32_t *result =
		gem_mmap__device_coherent(i915, obj.handle, 0, sz, PROT_READ);
	int fence[count];

	igt_require(gem_scheduler_has_semaphores(i915));
	igt_require(gem_scheduler_has_preemption(i915));
	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);

	for (int i = 0; i < count; i++) {
		const intel_ctx_t *tmp_ctx = intel_ctx_create(i915, &ctx->cfg);
		execbuf.rsvd1 = tmp_ctx->id;
		execbuf.batch_start_offset = (i + 1) * 1024;
		gem_execbuf_wr(i915, &execbuf);
		intel_ctx_destroy(i915, tmp_ctx);

		execbuf.flags |= I915_EXEC_FENCE_SUBMIT;
		execbuf.rsvd2 >>= 32;
		fence[i] = execbuf.rsvd2;
	}

	gem_sync(i915, obj.handle);
	gem_close(i915, obj.handle);

	/* no hangs! */
	for (int i = 0; i < count; i++) {
		igt_assert_eq(sync_fence_status(fence[i]), 1);
		close(fence[i]);
	}

	igt_assert_eq(*result, 8 * count);
	munmap(result, sz);
}

static void alarm_handler(int sig)
{
}

static int __execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
{
	int err;

	err = 0;
	if (ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf))
		err = -errno;

	errno = 0;
	return err;
}

static void test_parallel(int i915, const intel_ctx_t *ctx,
			  const struct intel_execution_engine2 *e)
{
	const struct intel_execution_engine2 *e2;
	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
	uint32_t scratch = gem_create(i915, 4096);
	uint32_t *out = gem_mmap__device_coherent(i915, scratch, 0, 4096, PROT_READ);
	uint32_t handle[I915_EXEC_RING_MASK];
	IGT_CORK_FENCE(cork);
	igt_spin_t *spin;
	int fence;
	int x = 0;
	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id), bb_offset;
	uint64_t scratch_offset = get_offset(ahnd, scratch, 4096, 0);

	fence = igt_cork_plug(&cork, i915),
	spin = igt_spin_new(i915,
			    .ahnd = ahnd,
			    .ctx = ctx,
			    .engine = e->flags,
			    .fence = fence,
			    .flags = (IGT_SPIN_FENCE_OUT |
				      IGT_SPIN_FENCE_IN));
	close(fence);

	/* Queue all secondaries */
	for_each_ctx_engine(i915, ctx, e2) {
		struct drm_i915_gem_relocation_entry reloc = {
			.target_handle = scratch,
			.offset = sizeof(uint32_t),
			.delta = sizeof(uint32_t) * x
		};
		struct drm_i915_gem_exec_object2 obj[] = {
			{ .handle = scratch, },
			{
				.relocs_ptr = to_user_pointer(&reloc),
				.relocation_count = !ahnd ? 1 : 0,
			}
		};
		struct drm_i915_gem_execbuffer2 execbuf = {
			.buffers_ptr = to_user_pointer(obj),
			.buffer_count = ARRAY_SIZE(obj),
			.flags = e2->flags | I915_EXEC_FENCE_SUBMIT,
			.rsvd1 = ctx->id,
			.rsvd2 = spin->out_fence,
		};
		uint32_t batch[16];
		int i;

		if (e2->flags == e->flags)
			continue;

		obj[1].handle = gem_create(i915, 4096);

		if (ahnd) {
			bb_offset = get_offset(ahnd, obj[1].handle, 4096, 0);
			obj[1].offset = bb_offset;
			obj[1].flags = EXEC_OBJECT_PINNED;
			obj[0].offset = scratch_offset;
			obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
		}

		i = 0;
		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
		if (gen >= 8) {
			batch[++i] = scratch_offset + reloc.delta;
			batch[++i] = scratch_offset >> 32;
		} else if (gen >= 4) {
			batch[++i] = 0;
			batch[++i] = reloc.delta;
			reloc.offset += sizeof(uint32_t);
		} else {
			batch[i]--;
			batch[++i] = reloc.delta;
		}
		batch[++i] = ~x;
		batch[++i] = MI_BATCH_BUFFER_END;
		gem_write(i915, obj[1].handle, 0, batch, sizeof(batch));

		if (gen < 6)
			execbuf.flags |= I915_EXEC_SECURE;

		gem_execbuf(i915, &execbuf);
		handle[x++] = obj[1].handle;
	}
	igt_assert(gem_bo_busy(i915, spin->handle));
	gem_close(i915, scratch);
	put_offset(ahnd, scratch);
	igt_require(x);

	/*
	 * No secondary should be executed since master is stalled. If there
	 * was no dependency chain at all, the secondaries would start
	 * immediately.
	 */
	for (int i = 0; i < x; i++) {
		igt_assert_eq_u32(out[i], 0);
		igt_assert(gem_bo_busy(i915, handle[i]));
	}
	igt_cork_unplug(&cork);

	/*
	 * Wait for all secondaries to complete. If we used a regular fence
	 * then the secondaries would not start until the master was complete.
	 * In this case that can only happen with a GPU reset, and so we run
	 * under the hang detector and double check that the master is still
	 * running afterwards.
	 */
	for (int i = 0; i < x; i++) {
		while (gem_bo_busy(i915, handle[i]))
			sleep(0);

		igt_assert_eq_u32(out[i], ~i);
		gem_close(i915, handle[i]);
		put_offset(ahnd, handle[i]);
	}
	munmap(out, 4096);

	/* Master should still be spinning, but all output should be written */
	igt_assert(gem_bo_busy(i915, spin->handle));
	igt_spin_free(i915, spin);
	put_ahnd(ahnd);
}

static void test_concurrent(int i915, const intel_ctx_t *ctx,
			    const struct intel_execution_engine2 *e)
{
	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id);
	struct drm_i915_gem_relocation_entry reloc = {
		.target_handle =  gem_create(i915, 4096),
		.write_domain = I915_GEM_DOMAIN_RENDER,
		.offset = sizeof(uint32_t),
	};
	struct drm_i915_gem_exec_object2 obj[] = {
		{ .handle = reloc.target_handle, },
		{
			.handle = gem_create(i915, 4096),
			.relocs_ptr = to_user_pointer(&reloc),
			.relocation_count = !ahnd ? 1 : 0,
		}
	};
	struct drm_i915_gem_execbuffer2 execbuf = {
		.buffers_ptr = to_user_pointer(obj),
		.buffer_count = ARRAY_SIZE(obj),
		.flags = e->flags | I915_EXEC_FENCE_SUBMIT,
		.rsvd1 = ctx->id,
	};
	IGT_CORK_FENCE(cork);
	uint32_t batch[16];
	igt_spin_t *spin;
	const intel_ctx_t *tmp_ctx;
	uint32_t result;
	uint64_t bb_offset, target_offset;
	int fence;
	int i;

	bb_offset = get_offset(ahnd, obj[1].handle, 4096, 0);
	target_offset = get_offset(ahnd, obj[0].handle, 4096, 0);
	if (ahnd) {
		obj[1].offset = bb_offset;
		obj[1].flags = EXEC_OBJECT_PINNED;
		obj[0].offset = target_offset;
		obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
	}

	/*
	 * A variant of test_parallel() that runs a bonded pair on a single
	 * engine and ensures that the secondary batch cannot start before
	 * the master is ready.
	 */

	fence = igt_cork_plug(&cork, i915),
	      spin = igt_spin_new(i915,
				  .ahnd = ahnd,
				  .ctx = ctx,
				  .engine = e->flags,
				  .fence = fence,
				  .flags = (IGT_SPIN_FENCE_OUT |
					    IGT_SPIN_FENCE_IN));
	close(fence);

	i = 0;
	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
	if (gen >= 8) {
		batch[++i] = target_offset + reloc.delta;
		batch[++i] = target_offset >> 32;
	} else if (gen >= 4) {
		batch[++i] = 0;
		batch[++i] = reloc.delta;
		reloc.offset += sizeof(uint32_t);
	} else {
		batch[i]--;
		batch[++i] = reloc.delta;
	}
	batch[++i] = 0xd0df0d;
	batch[++i] = MI_BATCH_BUFFER_END;
	gem_write(i915, obj[1].handle, 0, batch, sizeof(batch));

	tmp_ctx = intel_ctx_create(i915, &ctx->cfg);
	execbuf.rsvd1 = tmp_ctx->id;
	execbuf.rsvd2 = spin->out_fence;
	if (gen < 6)
		execbuf.flags |= I915_EXEC_SECURE;

	gem_execbuf(i915, &execbuf);
	intel_ctx_destroy(i915, tmp_ctx);
	gem_close(i915, obj[1].handle);
	put_offset(ahnd, obj[1].handle);

	/*
	 * No secondary should be executed since master is stalled. If there
	 * was no dependency chain at all, the secondaries would start
	 * immediately.
	 */
	usleep(20000);
	igt_assert(gem_bo_busy(i915, spin->handle));
	igt_assert(gem_bo_busy(i915, obj[0].handle));
	igt_cork_unplug(&cork);

	/*
	 * Wait for all secondaries to complete. If we used a regular fence
	 * then the secondaries would not start until the master was complete.
	 * In this case that can only happen with a GPU reset, and so we run
	 * under the hang detector and double check that the master is still
	 * running afterwards.
	 */
	gem_read(i915, obj[0].handle, 0, &result, sizeof(result));
	igt_assert_eq_u32(result, 0xd0df0d);
	gem_close(i915, obj[0].handle);
	put_offset(ahnd, obj[0].handle);

	/* Master should still be spinning, but all output should be written */
	igt_assert(gem_bo_busy(i915, spin->handle));
	igt_spin_free(i915, spin);
	put_ahnd(ahnd);
}

static void test_submit_chain(int i915, const intel_ctx_t *ctx)
{
	const struct intel_execution_engine2 *e;
	igt_spin_t *spin, *sn;
	IGT_LIST_HEAD(list);
	IGT_CORK_FENCE(cork);
	int fence;
	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id);

	/* Check that we can simultaneously launch spinners on each engine */

	fence = igt_cork_plug(&cork, i915);
	for_each_ctx_engine(i915, ctx, e) {
		spin = igt_spin_new(i915,
				    .ahnd = ahnd,
				    .ctx = ctx,
				    .engine = e->flags,
				    .fence = fence,
				    .flags = (IGT_SPIN_POLL_RUN |
					      IGT_SPIN_FENCE_OUT |
					      IGT_SPIN_FENCE_SUBMIT));

		fence = spin->out_fence;
		igt_list_move(&spin->link, &list);
	}

	/* Nothing shall run until we pop the cork */
	igt_list_for_each_entry(spin, &list, link) {
		igt_assert(gem_bo_busy(i915, spin->handle));
		igt_assert(!igt_spin_has_started(spin));
	}

	igt_cork_unplug(&cork);

	/* Then everything shall run in parallel */
	igt_list_for_each_entry_safe(spin, sn, &list, link) {
		igt_spin_busywait_until_started(spin);
		igt_spin_end(spin);
		igt_assert_eq(sync_fence_wait(spin->out_fence, 50), 0);
		igt_assert_eq(sync_fence_status(spin->out_fence), 1);
		igt_spin_free(i915, spin);
	}
	put_ahnd(ahnd);
}

static uint32_t batch_create(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	uint32_t handle;

	handle = gem_create(fd, 4096);
	gem_write(fd, handle, 0, &bbe, sizeof(bbe));

	return handle;
}

static void test_keep_in_fence(int fd, const intel_ctx_t *ctx,
			       const struct intel_execution_engine2 *e)
{
	struct sigaction sa = { .sa_handler = alarm_handler };
	struct drm_i915_gem_exec_object2 obj = {
		.handle = batch_create(fd),
	};
	struct drm_i915_gem_execbuffer2 execbuf = {
		.buffers_ptr = to_user_pointer(&obj),
		.buffer_count = 1,
		.flags = e->flags | I915_EXEC_FENCE_OUT,
		.rsvd1 = ctx->id,
	};
	unsigned long count, last;
	struct itimerval itv;
	igt_spin_t *spin;
	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
	int fence;

	spin = igt_spin_new(fd, .ahnd = ahnd, .ctx = ctx, .engine = e->flags);

	gem_execbuf_wr(fd, &execbuf);
	fence = upper_32_bits(execbuf.rsvd2);

	sigaction(SIGALRM, &sa, NULL);
	itv.it_interval.tv_sec = 0;
	itv.it_interval.tv_usec = 1000;
	itv.it_value.tv_sec = 0;
	itv.it_value.tv_usec = 10000;
	setitimer(ITIMER_REAL, &itv, NULL);

	execbuf.flags |= I915_EXEC_FENCE_IN;
	execbuf.rsvd2 = fence;

	last = -1;
	count = 0;
	do {
		int err = __execbuf(fd, &execbuf);

		igt_assert_eq(lower_32_bits(execbuf.rsvd2), fence);

		if (err == 0) {
			close(fence);

			fence = upper_32_bits(execbuf.rsvd2);
			execbuf.rsvd2 = fence;

			count++;
			continue;
		}

		igt_assert_eq(err, -EINTR);
		igt_assert_eq(upper_32_bits(execbuf.rsvd2), 0);

		if (last == count)
			break;

		last = count;
	} while (1);

	memset(&itv, 0, sizeof(itv));
	setitimer(ITIMER_REAL, &itv, NULL);

	gem_close(fd, obj.handle);
	close(fence);

	igt_spin_free(fd, spin);
	gem_quiescent_gpu(fd);
	put_ahnd(ahnd);
}

#define EXPIRED 0x10000
static void test_long_history(int fd, const intel_ctx_t *ctx,
			      long ring_size, unsigned flags)
{
	const uint32_t sz = 1 << 20;
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj[2];
	struct drm_i915_gem_execbuffer2 execbuf;
	const struct intel_execution_engine2 *e;
	unsigned int engines[I915_EXEC_RING_MASK + 1], nengine, n, s;
	unsigned long limit;
	int all_fences;
	IGT_CORK_HANDLE(c);

	limit = -1;
	if (!gem_uses_full_ppgtt(fd))
		limit = ring_size / 3;

	nengine = 0;
	for_each_ctx_engine(fd, ctx, e)
		engines[nengine++] = e->flags;
	igt_require(nengine);

	gem_quiescent_gpu(fd);

	memset(obj, 0, sizeof(obj));
	obj[1].handle = gem_create(fd, sz);
	gem_write(fd, obj[1].handle, sz - sizeof(bbe), &bbe, sizeof(bbe));

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_FENCE_OUT;
	execbuf.rsvd1 = ctx->id;

	gem_execbuf_wr(fd, &execbuf);
	all_fences = execbuf.rsvd2 >> 32;

	execbuf.buffers_ptr = to_user_pointer(obj);
	execbuf.buffer_count = 2;

	obj[0].handle = igt_cork_plug(&c, fd);

	igt_until_timeout(5) {
		const intel_ctx_t *tmp_ctx = intel_ctx_create(fd, &ctx->cfg);
		execbuf.rsvd1 = tmp_ctx->id;

		for (n = 0; n < nengine; n++) {
			struct sync_merge_data merge;

			execbuf.flags = engines[n] | I915_EXEC_FENCE_OUT;
			if (__gem_execbuf_wr(fd, &execbuf))
				continue;

			memset(&merge, 0, sizeof(merge));
			merge.fd2 = execbuf.rsvd2 >> 32;
			strcpy(merge.name, "igt");

			do_ioctl(all_fences, SYNC_IOC_MERGE, &merge);

			close(all_fences);
			close(merge.fd2);

			all_fences = merge.fence;
		}

		intel_ctx_destroy(fd, tmp_ctx);
		if (!--limit)
			break;
	}
	igt_cork_unplug(&c);

	igt_info("History depth = %d\n", sync_fence_count(all_fences));

	if (flags & EXPIRED)
		gem_sync(fd, obj[1].handle);

	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
	execbuf.buffer_count = 1;
	execbuf.rsvd2 = all_fences;
	execbuf.rsvd1 = ctx->id;

	for (s = 0; s < ring_size; s++) {
		for (n = 0; n < nengine; n++) {
			execbuf.flags = engines[n] | I915_EXEC_FENCE_IN;
			if (__gem_execbuf_wr(fd, &execbuf))
				continue;
		}
	}

	close(all_fences);

	gem_sync(fd, obj[1].handle);
	gem_close(fd, obj[1].handle);
	gem_close(fd, obj[0].handle);
}

static bool has_submit_fence(int fd)
{
	struct drm_i915_getparam gp;
	int value = 0;

	memset(&gp, 0, sizeof(gp));
	gp.param = I915_PARAM_HAS_EXEC_SUBMIT_FENCE;
	gp.value = &value;

	ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp, sizeof(gp));
	errno = 0;

	return value;
}

static bool has_syncobj(int fd)
{
	struct drm_get_cap cap = { .capability = DRM_CAP_SYNCOBJ };
	ioctl(fd, DRM_IOCTL_GET_CAP, &cap);
	return cap.value;
}

static bool exec_has_fence_array(int fd)
{
	struct drm_i915_getparam gp;
	int value = 0;

	memset(&gp, 0, sizeof(gp));
	gp.param = I915_PARAM_HAS_EXEC_FENCE_ARRAY;
	gp.value = &value;

	ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp, sizeof(gp));
	errno = 0;

	return value;
}

static void test_invalid_fence_array(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_exec_fence fence;
	void *ptr;

	/* create an otherwise valid execbuf */
	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	gem_execbuf(fd, &execbuf);

	execbuf.flags |= I915_EXEC_FENCE_ARRAY;
	gem_execbuf(fd, &execbuf);

	/* Now add a few invalid fence-array pointers */
	if (sizeof(execbuf.num_cliprects) == sizeof(size_t)) {
		execbuf.num_cliprects = -1;
		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
	}

	execbuf.num_cliprects = 1;
	execbuf.cliprects_ptr = -1;
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);

	memset(&fence, 0, sizeof(fence));
	execbuf.cliprects_ptr = to_user_pointer(&fence);
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);

	ptr = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
	igt_assert(ptr != MAP_FAILED);
	execbuf.cliprects_ptr = to_user_pointer(ptr);
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);

	do_or_die(mprotect(ptr, 4096, PROT_READ));
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);

	do_or_die(mprotect(ptr, 4096, PROT_NONE));
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);

	munmap(ptr, 4096);
}

static int __syncobj_to_sync_file(int fd, uint32_t handle)
{
	struct drm_syncobj_handle arg = {
		.handle = handle,
		.flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
	};

	return __syncobj_handle_to_fd(fd, &arg);
}

static int syncobj_export(int fd, uint32_t handle)
{
	return syncobj_handle_to_fd(fd, handle, 0);
}

static uint32_t syncobj_import(int fd, int syncobj)
{
	return syncobj_fd_to_handle(fd, syncobj, 0);
}

static bool syncobj_busy(int fd, uint32_t handle)
{
	bool result;
	int sf;

	sf = syncobj_handle_to_fd(fd, handle,
				  DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE);
	result = poll(&(struct pollfd){sf, POLLIN}, 1, 0) == 0;
	close(sf);

	return result;
}

static void test_syncobj_unused_fence(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	uint64_t ahnd = get_reloc_ahnd(fd, 0);
	igt_spin_t *spin = igt_spin_new(fd, .ahnd = ahnd);

	/* sanity check our syncobj_to_sync_file interface */
	igt_assert_eq(__syncobj_to_sync_file(fd, 0), -ENOENT);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_FENCE_ARRAY;
	execbuf.cliprects_ptr = to_user_pointer(&fence);
	execbuf.num_cliprects = 1;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	gem_execbuf(fd, &execbuf);

	/* no flags, the fence isn't created */
	igt_assert_eq(__syncobj_to_sync_file(fd, fence.handle), -EINVAL);
	igt_assert(gem_bo_busy(fd, obj.handle));

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);

	igt_spin_free(fd, spin);
	put_ahnd(ahnd);
}

static void test_syncobj_invalid_wait(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_FENCE_ARRAY;
	execbuf.cliprects_ptr = to_user_pointer(&fence);
	execbuf.num_cliprects = 1;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	/* waiting before the fence is set is invalid */
	fence.flags = I915_EXEC_FENCE_WAIT;
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);
}

static void test_syncobj_invalid_flags(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_FENCE_ARRAY;
	execbuf.cliprects_ptr = to_user_pointer(&fence);
	execbuf.num_cliprects = 1;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	/* set all flags to hit an invalid one */
	fence.flags = ~0;
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);
}

static void test_syncobj_signal(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	uint64_t ahnd = get_reloc_ahnd(fd, 0);
	igt_spin_t *spin = igt_spin_new(fd, .ahnd = ahnd);

	/* Check that the syncobj is signaled only when our request/fence is */

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_FENCE_ARRAY;
	execbuf.cliprects_ptr = to_user_pointer(&fence);
	execbuf.num_cliprects = 1;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	fence.flags = I915_EXEC_FENCE_SIGNAL;
	gem_execbuf(fd, &execbuf);

	igt_assert(gem_bo_busy(fd, obj.handle));
	igt_assert(syncobj_busy(fd, fence.handle));

	igt_spin_free(fd, spin);

	gem_sync(fd, obj.handle);
	igt_assert(!gem_bo_busy(fd, obj.handle));
	igt_assert(!syncobj_busy(fd, fence.handle));

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);
	put_ahnd(ahnd);
}

static void test_syncobj_wait(int fd, const intel_ctx_t *ctx)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	const struct intel_execution_engine2 *e;
	unsigned handle[I915_EXEC_RING_MASK + 1];
	igt_spin_t *spin;
	int n;
	uint64_t ahnd = get_reloc_ahnd(fd, 0);

	/* Check that we can use the syncobj to asynchronous wait prior to
	 * execution.
	 */

	gem_quiescent_gpu(fd);

	spin = igt_spin_new(fd, .ahnd = ahnd);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	/* Queue a signaler from the blocked engine */
	execbuf.flags = I915_EXEC_FENCE_ARRAY;
	execbuf.cliprects_ptr = to_user_pointer(&fence);
	execbuf.num_cliprects = 1;
	fence.flags = I915_EXEC_FENCE_SIGNAL;
	gem_execbuf(fd, &execbuf);
	igt_assert(gem_bo_busy(fd, spin->handle));

	gem_close(fd, obj.handle);
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	n = 0;
	for_each_ctx_engine(fd, ctx, e) {
		obj.handle = gem_create(fd, 4096);
		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

		/* Now wait upon the blocked engine */
		execbuf.flags = I915_EXEC_FENCE_ARRAY | e->flags;
		execbuf.rsvd1 = ctx->id;
		execbuf.cliprects_ptr = to_user_pointer(&fence);
		execbuf.num_cliprects = 1;
		fence.flags = I915_EXEC_FENCE_WAIT;
		gem_execbuf(fd, &execbuf);

		igt_assert(gem_bo_busy(fd, obj.handle));
		handle[n++] = obj.handle;
	}
	igt_assert(gem_bo_busy(fd, spin->handle));
	syncobj_destroy(fd, fence.handle);

	for (int i = 0; i < n; i++)
		igt_assert(gem_bo_busy(fd, handle[i]));

	igt_spin_free(fd, spin);

	for (int i = 0; i < n; i++) {
		gem_sync(fd, handle[i]);
		gem_close(fd, handle[i]);
	}

	put_ahnd(ahnd);
}

static void test_syncobj_export(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	int export[2];
	igt_spin_t *spin;
	uint64_t ahnd = get_reloc_ahnd(fd, 0);

	spin = igt_spin_new(fd, .ahnd = ahnd);

	/* Check that if we export the syncobj prior to use it picks up
	 * the later fence. This allows a syncobj to establish a channel
	 * between clients that may be updated to a later fence by either
	 * end.
	 */
	for (int n = 0; n < ARRAY_SIZE(export); n++)
		export[n] = syncobj_export(fd, fence.handle);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_FENCE_ARRAY;
	execbuf.cliprects_ptr = to_user_pointer(&fence);
	execbuf.num_cliprects = 1;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	fence.flags = I915_EXEC_FENCE_SIGNAL;
	gem_execbuf(fd, &execbuf);

	igt_assert(syncobj_busy(fd, fence.handle));
	igt_assert(gem_bo_busy(fd, obj.handle));

	for (int n = 0; n < ARRAY_SIZE(export); n++) {
		uint32_t import = syncobj_import(fd, export[n]);
		igt_assert(syncobj_busy(fd, import));
		syncobj_destroy(fd, import);
	}

	igt_spin_free(fd, spin);

	gem_sync(fd, obj.handle);
	igt_assert(!gem_bo_busy(fd, obj.handle));
	igt_assert(!syncobj_busy(fd, fence.handle));

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);

	for (int n = 0; n < ARRAY_SIZE(export); n++) {
		uint32_t import = syncobj_import(fd, export[n]);
		igt_assert(!syncobj_busy(fd, import));
		syncobj_destroy(fd, import);
		close(export[n]);
	}

	put_ahnd(ahnd);
}

static void test_syncobj_repeat(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	const unsigned nfences = 4096;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_fence *fence;
	int export;
	igt_spin_t *spin;
	uint64_t ahnd = get_reloc_ahnd(fd, 0);

	spin = igt_spin_new(fd, .ahnd = ahnd);

	/* Check that we can wait on the same fence multiple times */
	fence = calloc(nfences, sizeof(*fence));
	fence->handle = syncobj_create(fd, 0);
	export = syncobj_export(fd, fence->handle);
	for (int i = 1; i < nfences; i++)
		fence[i].handle = syncobj_import(fd, export);
	close(export);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_FENCE_ARRAY;
	execbuf.cliprects_ptr = to_user_pointer(fence);
	execbuf.num_cliprects = nfences;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	for (int i = 0; i < nfences; i++)
		fence[i].flags = I915_EXEC_FENCE_SIGNAL;

	gem_execbuf(fd, &execbuf);

	for (int i = 0; i < nfences; i++) {
		igt_assert(syncobj_busy(fd, fence[i].handle));
		fence[i].flags |= I915_EXEC_FENCE_WAIT;
	}
	igt_assert(gem_bo_busy(fd, obj.handle));

	gem_execbuf(fd, &execbuf);

	for (int i = 0; i < nfences; i++)
		igt_assert(syncobj_busy(fd, fence[i].handle));
	igt_assert(gem_bo_busy(fd, obj.handle));

	igt_spin_free(fd, spin);

	gem_sync(fd, obj.handle);
	gem_close(fd, obj.handle);

	for (int i = 0; i < nfences; i++) {
		igt_assert(!syncobj_busy(fd, fence[i].handle));
		syncobj_destroy(fd, fence[i].handle);
	}
	free(fence);

	put_ahnd(ahnd);
}

static void test_syncobj_import(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	uint64_t ahnd = get_reloc_ahnd(fd, 0);
	igt_spin_t *spin = igt_spin_new(fd, .ahnd = ahnd);
	uint32_t sync = syncobj_create(fd, 0);
	int fence;

	/* Check that we can create a syncobj from an explicit fence (which
	 * uses sync_file) and that it acts just like a regular fence.
	 */

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_FENCE_OUT;
	execbuf.rsvd2 = -1;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	gem_execbuf_wr(fd, &execbuf);

	fence = execbuf.rsvd2 >> 32;
	igt_assert(fence_busy(fence));
	syncobj_import_sync_file(fd, sync, fence);
	close(fence);

	igt_assert(gem_bo_busy(fd, obj.handle));
	igt_assert(syncobj_busy(fd, sync));

	igt_spin_free(fd, spin);

	gem_sync(fd, obj.handle);
	igt_assert(!gem_bo_busy(fd, obj.handle));
	igt_assert(!syncobj_busy(fd, sync));

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, sync);
	put_ahnd(ahnd);
}

static void test_syncobj_channel(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	unsigned int *control;
	int syncobj[3];

	/* Create a pair of channels (like a pipe) between two clients
	 * and try to create races on the syncobj.
	 */

	control = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
	igt_assert(control != MAP_FAILED);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_FENCE_OUT;
	execbuf.rsvd2 = -1;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	for (int i = 0; i < ARRAY_SIZE(syncobj); i++) {
		struct drm_i915_gem_exec_fence fence;

		execbuf.flags = I915_EXEC_FENCE_ARRAY;
		execbuf.cliprects_ptr = to_user_pointer(&fence);
		execbuf.num_cliprects = 1;

		/* Create a primed fence */
		fence.handle = syncobj_create(fd, 0);
		fence.flags = I915_EXEC_FENCE_SIGNAL;

		gem_execbuf(fd, &execbuf);

		syncobj[i] = fence.handle;
	}

	/* Two processes in ping-pong unison (pipe), one out of sync */
	igt_fork(child, 1) {
		struct drm_i915_gem_exec_fence fence[3];
		unsigned long count;

		execbuf.flags = I915_EXEC_FENCE_ARRAY;
		execbuf.cliprects_ptr = to_user_pointer(fence);
		execbuf.num_cliprects = 3;

		fence[0].handle = syncobj[0];
		fence[0].flags = I915_EXEC_FENCE_SIGNAL;

		fence[1].handle = syncobj[1];
		fence[1].flags = I915_EXEC_FENCE_WAIT;

		fence[2].handle = syncobj[2];
		fence[2].flags = I915_EXEC_FENCE_WAIT;

		count = 0;
		while (!*(volatile unsigned *)control) {
			gem_execbuf(fd, &execbuf);
			count++;
		}

		control[1] = count;
	}
	igt_fork(child, 1) {
		struct drm_i915_gem_exec_fence fence[3];
		unsigned long count;

		execbuf.flags = I915_EXEC_FENCE_ARRAY;
		execbuf.cliprects_ptr = to_user_pointer(fence);
		execbuf.num_cliprects = 3;

		fence[0].handle = syncobj[0];
		fence[0].flags = I915_EXEC_FENCE_WAIT;

		fence[1].handle = syncobj[1];
		fence[1].flags = I915_EXEC_FENCE_SIGNAL;

		fence[2].handle = syncobj[2];
		fence[2].flags = I915_EXEC_FENCE_WAIT;

		count = 0;
		while (!*(volatile unsigned *)control) {
			gem_execbuf(fd, &execbuf);
			count++;
		}
		control[2] = count;
	}
	igt_fork(child, 1) {
		struct drm_i915_gem_exec_fence fence;
		unsigned long count;

		execbuf.flags = I915_EXEC_FENCE_ARRAY;
		execbuf.cliprects_ptr = to_user_pointer(&fence);
		execbuf.num_cliprects = 1;

		fence.handle = syncobj[2];
		fence.flags = I915_EXEC_FENCE_SIGNAL;

		count = 0;
		while (!*(volatile unsigned *)control) {
			gem_execbuf(fd, &execbuf);
			count++;
		}
		control[3] = count;
	}

	sleep(1);
	*control = 1;
	igt_waitchildren();

	igt_info("Pipe=[%u, %u], gooseberry=%u\n",
		 control[1], control[2], control[3]);
	munmap(control, 4096);

	gem_sync(fd, obj.handle);
	gem_close(fd, obj.handle);

	for (int i = 0; i < ARRAY_SIZE(syncobj); i++)
		syncobj_destroy(fd, syncobj[i]);
}

static bool has_syncobj_timeline(int fd)
{
	struct drm_get_cap cap = { .capability = DRM_CAP_SYNCOBJ_TIMELINE };
	ioctl(fd, DRM_IOCTL_GET_CAP, &cap);
	return cap.value;
}

static bool exec_has_timeline_fences(int fd)
{
	struct drm_i915_getparam gp;
	int value = 0;

	memset(&gp, 0, sizeof(gp));
	gp.param = I915_PARAM_HAS_EXEC_TIMELINE_FENCES;
	gp.value = &value;

	ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp, sizeof(gp));
	errno = 0;

	return value;
}

static const char *test_invalid_timeline_fence_array_desc =
	"Verifies invalid execbuf parameters in"
	" drm_i915_gem_execbuffer_ext_timeline_fences are rejected";
static void test_invalid_timeline_fence_array(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
	struct drm_i915_gem_exec_fence fence;
	uint64_t value;
	void *ptr;

	fd = gem_reopen_driver(fd);

	/* create an otherwise valid execbuf */
	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	gem_execbuf(fd, &execbuf);

	/* Invalid num_cliprects value */
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
	execbuf.num_cliprects = 1;
	execbuf.flags = I915_EXEC_USE_EXTENSIONS;
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);

	fence.handle = syncobj_create(fd, 0);
	fence.flags = I915_EXEC_FENCE_SIGNAL;
	value = 1;

	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(&value);

	/* Invalid fence array & i915 ext */
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
	execbuf.num_cliprects = 0;
	execbuf.flags = I915_EXEC_FENCE_ARRAY | I915_EXEC_USE_EXTENSIONS;
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);

	execbuf.flags = I915_EXEC_USE_EXTENSIONS;

	/* Invalid handles_ptr */
	value = 1;
	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = -1;
	timeline_fences.values_ptr = to_user_pointer(&value);
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);

	/* Invalid values_ptr */
	value = 1;
	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = -1;
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);

	/* Invalid syncobj handle */
	memset(&fence, 0, sizeof(fence));
	fence.handle = 0;
	fence.flags = I915_EXEC_FENCE_WAIT;
	value = 1;
	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(&value);
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);

	/* Invalid syncobj timeline point */
	memset(&fence, 0, sizeof(fence));
	fence.handle = syncobj_create(fd, 0);
	fence.flags = I915_EXEC_FENCE_WAIT;
	value = 1;
	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(&value);
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
	syncobj_destroy(fd, fence.handle);

	/* Invalid handles_ptr */
	ptr = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
	igt_assert(ptr != MAP_FAILED);
	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(ptr);
	timeline_fences.values_ptr = to_user_pointer(&value);
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);

	do_or_die(mprotect(ptr, 4096, PROT_READ));
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);

	do_or_die(mprotect(ptr, 4096, PROT_NONE));
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);

	munmap(ptr, 4096);

	/* Invalid values_ptr */
	ptr = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
	igt_assert(ptr != MAP_FAILED);
	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(ptr);
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);

	do_or_die(mprotect(ptr, 4096, PROT_READ));
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);

	do_or_die(mprotect(ptr, 4096, PROT_NONE));
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);

	munmap(ptr, 4096);
	close(fd);
}

static const char *test_syncobj_timeline_unused_fence_desc =
	"Verifies that a timeline syncobj passed into"
	" drm_i915_gem_execbuffer_ext_timeline_fences but with no signal/wait"
	" flag is left untouched";
static void test_syncobj_timeline_unused_fence(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	uint64_t value = 1, ahnd = get_reloc_ahnd(fd, 0);
	igt_spin_t *spin = igt_spin_new(fd, .ahnd = ahnd);

	/* sanity check our syncobj_to_sync_file interface */
	igt_assert_eq(__syncobj_to_sync_file(fd, 0), -ENOENT);

	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(&value);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_USE_EXTENSIONS;
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
	execbuf.num_cliprects = 0;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	gem_execbuf(fd, &execbuf);

	/* no flags, the fence isn't created */
	igt_assert_eq(__syncobj_to_sync_file(fd, fence.handle), -EINVAL);
	igt_assert(gem_bo_busy(fd, obj.handle));

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);

	igt_spin_free(fd, spin);
	put_ahnd(ahnd);
}

static const char *test_syncobj_timeline_invalid_wait_desc =
	"Verifies that submitting an execbuf with a wait on a timeline syncobj"
	" point that does not exists is rejected";
static void test_syncobj_timeline_invalid_wait(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	uint64_t value = 1;

	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(&value);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_USE_EXTENSIONS;
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
	execbuf.num_cliprects = 0;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	/* waiting before the fence point 1 is set is invalid */
	fence.flags = I915_EXEC_FENCE_WAIT;
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);

	/* Now set point 1. */
	fence.flags = I915_EXEC_FENCE_SIGNAL;
	gem_execbuf(fd, &execbuf);

	/* waiting before the fence point 2 is set is invalid */
	value = 2;
	fence.flags = I915_EXEC_FENCE_WAIT;
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);
}

static const char *test_syncobj_timeline_invalid_flags_desc =
	"Verifies that invalid fence flags in"
	" drm_i915_gem_execbuffer_ext_timeline_fences are rejected";
static void test_syncobj_timeline_invalid_flags(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	uint64_t value = 1;

	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(&value);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_USE_EXTENSIONS;
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
	execbuf.num_cliprects = 0;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	/* set all flags to hit an invalid one */
	fence.flags = ~0;
	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);
}

static uint64_t
gettime_ns(void)
{
	struct timespec current;
	clock_gettime(CLOCK_MONOTONIC, &current);
	return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
}

static const char *test_syncobj_timeline_signal_desc =
	"Verifies proper signaling of a timeline syncobj through execbuf";
static void test_syncobj_timeline_signal(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	uint64_t value = 42, query_value, ahnd = get_reloc_ahnd(fd, 0);
	igt_spin_t *spin;

	/* Check that the syncobj is signaled only when our request/fence is */

	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(&value);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_USE_EXTENSIONS;
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
	execbuf.num_cliprects = 0;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	fence.flags = I915_EXEC_FENCE_SIGNAL;

	/* Check syncobj after waiting on the buffer handle. */
	spin = igt_spin_new(fd, .ahnd = ahnd);
	gem_execbuf(fd, &execbuf);

	igt_assert(gem_bo_busy(fd, obj.handle));
	igt_assert(syncobj_busy(fd, fence.handle));
	igt_assert(syncobj_timeline_wait(fd, &fence.handle, &value, 1, 0,
					 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE, NULL));
	igt_assert_eq(syncobj_timeline_wait_err(fd, &fence.handle, &value,
						1, 0, 0), -ETIME);

	igt_spin_free(fd, spin);

	gem_sync(fd, obj.handle);
	igt_assert(!syncobj_busy(fd, fence.handle));
	igt_assert(!gem_bo_busy(fd, obj.handle));

	syncobj_timeline_query(fd, &fence.handle, &query_value, 1);
	igt_assert_eq(query_value, value);

	spin = igt_spin_new(fd, .ahnd = ahnd);

	/*
	 * Wait on the syncobj and verify the state of the buffer
	 * handle.
	 */
	value = 84;
	gem_execbuf(fd, &execbuf);

	igt_assert(gem_bo_busy(fd, obj.handle));
	igt_assert(gem_bo_busy(fd, obj.handle));
	igt_assert(syncobj_busy(fd, fence.handle));
	igt_assert(syncobj_timeline_wait(fd, &fence.handle, &value, 1, 0,
					 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE, NULL));
	igt_assert_eq(syncobj_timeline_wait_err(fd, &fence.handle, &value,
						1, 0, 0), -ETIME);

	igt_spin_free(fd, spin);

	igt_assert(syncobj_timeline_wait(fd, &fence.handle, &value, 1,
					 gettime_ns() + NSEC_PER_SEC,
					 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL));

	igt_assert(!gem_bo_busy(fd, obj.handle));
	igt_assert(!syncobj_busy(fd, fence.handle));

	syncobj_timeline_query(fd, &fence.handle, &query_value, 1);
	igt_assert_eq(query_value, value);

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);
	put_ahnd(ahnd);
}

static const char *test_syncobj_timeline_wait_desc =
	"Verifies that waiting on a timeline syncobj point between engines"
	" works";
static void test_syncobj_timeline_wait(int fd, const intel_ctx_t *ctx)
{
	const uint32_t bbe[2] = {
		MI_BATCH_BUFFER_END,
		MI_NOOP,
	};
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf = {
		.buffers_ptr = to_user_pointer(&obj),
		.buffer_count = 1,
	};
	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	unsigned handle[I915_EXEC_RING_MASK + 1];
	const struct intel_execution_engine2 *e;
	uint64_t value = 1, ahnd = get_reloc_ahnd(fd, ctx->id);
	igt_spin_t *spin;
	int n;

	/* Check that we can use the syncobj to asynchronous wait prior to
	 * execution.
	 */

	gem_quiescent_gpu(fd);

	spin = igt_spin_new(fd, .ahnd = ahnd, .ctx = ctx, .engine = ALL_ENGINES);

	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(&value);

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, bbe, sizeof(bbe));

	/* Queue a signaler from the blocked engine */
	execbuf.flags = I915_EXEC_USE_EXTENSIONS;
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
	execbuf.num_cliprects = 0;
	fence.flags = I915_EXEC_FENCE_SIGNAL;
	gem_execbuf(fd, &execbuf);
	igt_assert(gem_bo_busy(fd, spin->handle));
	gem_close(fd, obj.handle);

	n = 0;
	for_each_ctx_engine(fd, ctx, e) {
		obj.handle = gem_create(fd, 4096);
		gem_write(fd, obj.handle, 0, bbe, sizeof(bbe));

		/* Now wait upon the blocked engine */
		execbuf.flags = I915_EXEC_USE_EXTENSIONS | e->flags;
		execbuf.rsvd1 = ctx->id,
		execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
		execbuf.num_cliprects = 0;
		fence.flags = I915_EXEC_FENCE_WAIT;
		gem_execbuf(fd, &execbuf);

		igt_assert(gem_bo_busy(fd, obj.handle));
		handle[n++] = obj.handle;
	}
	igt_assert(gem_bo_busy(fd, spin->handle));
	syncobj_destroy(fd, fence.handle);

	for (int i = 0; i < n; i++)
		igt_assert(gem_bo_busy(fd, handle[i]));

	igt_spin_free(fd, spin);

	for (int i = 0; i < n; i++) {
		gem_sync(fd, handle[i]);
		gem_close(fd, handle[i]);
	}
	put_ahnd(ahnd);
}

static const char *test_syncobj_timeline_export_desc =
	"Verify exporting of timeline syncobj signaled by i915";
static void test_syncobj_timeline_export(int fd)
{
	const uint32_t bbe[2] = {
		MI_BATCH_BUFFER_END,
		MI_NOOP,
	};
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
	struct drm_i915_gem_exec_fence fence = {
		.handle = syncobj_create(fd, 0),
	};
	uint64_t value = 1, ahnd = get_reloc_ahnd(fd, 0);
	int export[2];
	igt_spin_t *spin = igt_spin_new(fd, .ahnd = ahnd);

	/* Check that if we export the syncobj prior to use it picks up
	 * the later fence. This allows a syncobj to establish a channel
	 * between clients that may be updated to a later fence by either
	 * end.
	 */
	for (int n = 0; n < ARRAY_SIZE(export); n++)
		export[n] = syncobj_export(fd, fence.handle);

	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(&fence);
	timeline_fences.values_ptr = to_user_pointer(&value);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_USE_EXTENSIONS;
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
	execbuf.num_cliprects = 0;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, bbe, sizeof(bbe));

	fence.flags = I915_EXEC_FENCE_SIGNAL;
	gem_execbuf(fd, &execbuf);

	igt_assert(syncobj_busy(fd, fence.handle));
	igt_assert(gem_bo_busy(fd, obj.handle));

	for (int n = 0; n < ARRAY_SIZE(export); n++) {
		uint32_t import = syncobj_import(fd, export[n]);
		igt_assert(syncobj_busy(fd, import));
		syncobj_destroy(fd, import);
	}

	igt_spin_free(fd, spin);

	gem_sync(fd, obj.handle);
	igt_assert(!gem_bo_busy(fd, obj.handle));
	igt_assert(!syncobj_busy(fd, fence.handle));

	gem_close(fd, obj.handle);
	syncobj_destroy(fd, fence.handle);

	for (int n = 0; n < ARRAY_SIZE(export); n++) {
		uint32_t import = syncobj_import(fd, export[n]);
		igt_assert(!syncobj_busy(fd, import));
		syncobj_destroy(fd, import);
		close(export[n]);
	}
	put_ahnd(ahnd);
}

static const char *test_syncobj_timeline_repeat_desc =
	"Verifies that waiting & signaling a same timeline syncobj point within"
	" the same execbuf fworks";
static void test_syncobj_timeline_repeat(int fd)
{
	const uint32_t bbe[2] = {
		MI_BATCH_BUFFER_END,
		MI_NOOP,
	};
	const unsigned nfences = 4096;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
	struct drm_i915_gem_exec_fence *fence;
	uint64_t *values, ahnd = get_reloc_ahnd(fd, 0);
	int export;
	igt_spin_t *spin = igt_spin_new(fd, .ahnd = ahnd);

	/* Check that we can wait on the same fence multiple times */
	fence = calloc(nfences, sizeof(*fence));
	values = calloc(nfences, sizeof(*values));
	fence->handle = syncobj_create(fd, 0);
	values[0] = 1;
	export = syncobj_export(fd, fence->handle);
	for (int i = 1; i < nfences; i++) {
		fence[i].handle = syncobj_import(fd, export);
		values[i] = i + 1;
	}
	close(export);

	memset(&timeline_fences, 0, sizeof(timeline_fences));
	timeline_fences.base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
	timeline_fences.fence_count = 1;
	timeline_fences.handles_ptr = to_user_pointer(fence);
	timeline_fences.values_ptr = to_user_pointer(values);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_USE_EXTENSIONS;
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences);
	execbuf.num_cliprects = 0;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, bbe, sizeof(bbe));

	for (int i = 0; i < nfences; i++)
		fence[i].flags = I915_EXEC_FENCE_SIGNAL;

	gem_execbuf(fd, &execbuf);

	for (int i = 0; i < nfences; i++) {
		igt_assert(syncobj_busy(fd, fence[i].handle));
		/*
		 * Timeline syncobj cannot resignal the same point
		 * again.
		 */
		fence[i].flags |= I915_EXEC_FENCE_WAIT;
	}
	igt_assert(gem_bo_busy(fd, obj.handle));

	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);

	for (int i = 0; i < nfences; i++) {
		igt_assert(syncobj_busy(fd, fence[i].handle));
		fence[i].flags = I915_EXEC_FENCE_WAIT;
	}
	igt_assert(gem_bo_busy(fd, obj.handle));

	gem_execbuf(fd, &execbuf);

	for (int i = 0; i < nfences; i++)
		igt_assert(syncobj_busy(fd, fence[i].handle));
	igt_assert(gem_bo_busy(fd, obj.handle));

	igt_spin_free(fd, spin);

	gem_sync(fd, obj.handle);
	gem_close(fd, obj.handle);

	for (int i = 0; i < nfences; i++) {
		igt_assert(!syncobj_busy(fd, fence[i].handle));
		syncobj_destroy(fd, fence[i].handle);
	}
	free(fence);
	free(values);
	put_ahnd(ahnd);
}

static const char *test_syncobj_timeline_multiple_ext_nodes_desc =
	"Verify that passing multiple execbuffer_ext nodes works";
static void test_syncobj_timeline_multiple_ext_nodes(int fd)
{
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences[8];
	uint32_t syncobjs[4];
	struct drm_i915_gem_exec_fence fences[8];
	uint64_t values[8];

	igt_assert(ARRAY_SIZE(syncobjs) < ARRAY_SIZE(values));

	for (uint32_t i = 0; i < ARRAY_SIZE(syncobjs); i++)
		syncobjs[i] = syncobj_create(fd, 0);

	/* Build a chain of
	 * drm_i915_gem_execbuffer_ext_timeline_fences, each signaling
	 * a syncobj at a particular point.
	 */
	for (uint32_t i = 0; i < ARRAY_SIZE(timeline_fences); i++) {
		uint32_t idx = ARRAY_SIZE(timeline_fences) - 1 - i;
		struct drm_i915_gem_execbuffer_ext_timeline_fences *iter =
			&timeline_fences[idx];
		struct drm_i915_gem_execbuffer_ext_timeline_fences *next =
			i == 0 ? NULL : &timeline_fences[ARRAY_SIZE(timeline_fences) - i];
		uint64_t *value = &values[idx];
		struct drm_i915_gem_exec_fence *fence = &fences[idx];

		fence->flags = I915_EXEC_FENCE_SIGNAL;
		fence->handle = syncobjs[idx % ARRAY_SIZE(syncobjs)];
		*value = 3 * i + 1;

		memset(iter, 0, sizeof(*iter));
		iter->base.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES;
		iter->base.next_extension = to_user_pointer(next);
		iter->fence_count = 1;
		iter->handles_ptr = to_user_pointer(fence);
		iter->values_ptr = to_user_pointer(value);
	}

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = I915_EXEC_USE_EXTENSIONS;
	execbuf.cliprects_ptr = to_user_pointer(&timeline_fences[0]);
	execbuf.num_cliprects = 0;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	gem_execbuf(fd, &execbuf);

	/* Wait on the last set of point signaled on each syncobj. */
	igt_assert(syncobj_timeline_wait(fd, syncobjs,
					 &values[ARRAY_SIZE(values) - ARRAY_SIZE(syncobjs)],
					 ARRAY_SIZE(syncobjs),
					 gettime_ns() + NSEC_PER_SEC,
					 0, NULL));

	igt_assert(!gem_bo_busy(fd, obj.handle));

	gem_close(fd, obj.handle);
	for (uint32_t i = 0; i < ARRAY_SIZE(syncobjs); i++)
		syncobj_destroy(fd, syncobjs[i]);
}

#define HSW_CS_GPR(n)                   (0x600 + 8*(n))
#define RING_TIMESTAMP                  (0x358)
#define MI_PREDICATE_RESULT_1           (0x41c)
#define MI_SET_PREDICATE_RESULT         (0x3b8)

#define WAIT_BB_OFFSET			(64 << 20)
#define COUNTER_OFFSET			(65 << 20)

struct inter_engine_context {
	int fd;
	const intel_ctx_cfg_t *cfg;
	bool use_relocs;

	struct {
		const intel_ctx_t *ctx;
	} iterations[9];

	struct intel_engine_data engines;

	struct inter_engine_batches {
		void *increment_bb;
		uint32_t increment_bb_len;
		uint32_t increment_bb_handle;

		uint32_t timeline;

		void *read0_ptrs[2];
		void *read1_ptrs[2];
		void *write_ptrs[2];
	} *batches;

	const intel_ctx_t *wait_ctx;
	uint32_t wait_timeline;

	struct drm_i915_gem_exec_object2 engine_counter_object;
};

static void submit_timeline_execbuf(struct inter_engine_context *context,
				    struct drm_i915_gem_execbuffer2 *execbuf,
				    uint32_t run_engine_idx,
				    uint32_t wait_syncobj,
				    uint64_t wait_value,
				    uint32_t signal_syncobj,
				    uint64_t signal_value)
{
	uint64_t values[2] = { };
	struct drm_i915_gem_exec_fence fences[2] = { };
	struct drm_i915_gem_execbuffer_ext_timeline_fences fence_list = {
		.base = {
			.name = DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
		},
		.handles_ptr = to_user_pointer(fences),
		.values_ptr = to_user_pointer(values),
	};

	if (wait_syncobj) {
		fences[fence_list.fence_count] = (struct drm_i915_gem_exec_fence) {
			.handle = wait_syncobj,
			.flags = I915_EXEC_FENCE_WAIT,
		};
		values[fence_list.fence_count] = wait_value;
		fence_list.fence_count++;
	}

	if (signal_syncobj) {
		fences[fence_list.fence_count] = (struct drm_i915_gem_exec_fence) {
			.handle = signal_syncobj,
			.flags = I915_EXEC_FENCE_SIGNAL,
		};
		values[fence_list.fence_count] = signal_value;
		fence_list.fence_count++;
	}

	if (wait_syncobj || signal_syncobj) {
		execbuf->flags |= I915_EXEC_USE_EXTENSIONS;
		execbuf->cliprects_ptr = to_user_pointer(&fence_list);
	}

	execbuf->flags |= context->engines.engines[run_engine_idx].flags;

	gem_execbuf(context->fd, execbuf);
}

static unsigned int offset_in_page(void *addr)
{
	return (uintptr_t)addr & 4095;
}

static uint64_t
get_cs_timestamp_frequency(int fd)
{
	int cs_ts_freq = 0;
	drm_i915_getparam_t gp;

	gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
	gp.value = &cs_ts_freq;
	if (igt_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
		return cs_ts_freq;

	igt_skip("Kernel with PARAM_CS_TIMESTAMP_FREQUENCY support required\n");
}

static bool use_set_predicate_result(int i915)
{
	uint16_t devid = intel_get_drm_devid(i915);

	return intel_graphics_ver(devid) >= IP_VER(12, 50);
}

static struct drm_i915_gem_exec_object2
build_wait_bb(int i915,
	      const struct intel_execution_engine2 *engine,
	      uint64_t delay,
	      bool use_relocs,
	      struct drm_i915_gem_relocation_entry *relocs)
{
	const uint64_t timestamp_frequency = get_cs_timestamp_frequency(i915);
	uint64_t wait_value =
		0xffffffffffffffff - (delay * timestamp_frequency) / NSEC_PER_SEC;
	struct drm_i915_gem_exec_object2 obj = {};
	uint32_t mmio_base = gem_engine_mmio_base(i915, engine->name);
	uint32_t *map, *bb;

	igt_debug("%s wait_value=0x%"PRIx64", %x\n", engine->name, wait_value, mmio_base);
	igt_assert(mmio_base);

	obj.handle = gem_create(i915, 4096);
	obj.relocs_ptr = to_user_pointer(memset(relocs, 0, sizeof(*relocs)));
	obj.relocation_count = use_relocs ? 1 : 0;
	obj.offset = WAIT_BB_OFFSET;

	relocs->target_handle = obj.handle;
	relocs->presumed_offset = obj.offset;

	map = gem_mmap__device_coherent(i915, obj.handle, 0, 4096, PROT_WRITE);
	bb = map;

	*bb++ = MI_LOAD_REGISTER_IMM;
	*bb++ = mmio_base + HSW_CS_GPR(0);
	*bb++ = wait_value & 0xffffffff;
	*bb++ = MI_LOAD_REGISTER_IMM;
	*bb++ = mmio_base + HSW_CS_GPR(0) + 4;
	*bb++ = wait_value >> 32;

	*bb++ = MI_LOAD_REGISTER_REG;
	*bb++ = mmio_base + RING_TIMESTAMP;
	*bb++ = mmio_base + HSW_CS_GPR(1);
	*bb++ = MI_LOAD_REGISTER_IMM;
	*bb++ = mmio_base + HSW_CS_GPR(1) + 4;
	*bb++ = 0;

	*bb++ = MI_LOAD_REGISTER_IMM;
	*bb++ = mmio_base + HSW_CS_GPR(2) + 4;
	*bb++ = 0;
	relocs->delta = offset_in_page(bb);
	*bb++ = MI_LOAD_REGISTER_REG;
	*bb++ = mmio_base + RING_TIMESTAMP;
	*bb++ = mmio_base + HSW_CS_GPR(2);

	*bb++ = MI_MATH(4);
	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(2));
	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(1));
	*bb++ = MI_MATH_SUB;
	*bb++ = MI_MATH_STORE(MI_MATH_REG(3), MI_MATH_REG_ACCU);

	if (use_set_predicate_result(i915)) {
		*bb++ = MI_MATH(4);
		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(0));
		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(3));
		*bb++ = MI_MATH_ADD;
		*bb++ = MI_MATH_STORE(MI_MATH_REG(4), MI_MATH_REG_CF);

		*bb++ = MI_LOAD_REGISTER_REG;
		*bb++ = mmio_base + HSW_CS_GPR(4);
		*bb++ = mmio_base + MI_SET_PREDICATE_RESULT;
	} else {
		*bb++ = MI_MATH(4);
		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(0));
		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(3));
		*bb++ = MI_MATH_ADD;
		*bb++ = MI_MATH_STOREINV(MI_MATH_REG(4), MI_MATH_REG_CF);

		*bb++ = MI_LOAD_REGISTER_REG;
		*bb++ = mmio_base + HSW_CS_GPR(4);
		*bb++ = mmio_base + MI_PREDICATE_RESULT_1;
	}

	*bb++ = MI_BATCH_BUFFER_START | MI_BATCH_PREDICATE | 1;
	relocs->offset = offset_in_page(bb);
	*bb++ = obj.offset + relocs->delta;
	*bb++ = obj.offset >> 32;

	if (use_set_predicate_result(i915))
		*bb++ = 1 << 23; // MI_SET_PREDICATE

	*bb++ = MI_BATCH_BUFFER_END;

	munmap(map, 4096);
	return obj;
}

static void wait_engine(int i915,
			struct inter_engine_context *context,
			uint32_t run_engine_idx,
			uint32_t signal_syncobj,
			uint64_t signal_value)
{
	struct drm_i915_gem_relocation_entry reloc;
	struct drm_i915_gem_exec_object2 objects[2] = {
		context->engine_counter_object,
		build_wait_bb(i915,
			      &context->engines.engines[run_engine_idx],
			      20 * 1000 * 1000ull /* 20ms */,
			      context->use_relocs,
			      &reloc),
	};
	struct drm_i915_gem_execbuffer2 execbuf = {
		.buffers_ptr = to_user_pointer(&objects[0]),
		.buffer_count = 2,
		.rsvd1 = context->wait_ctx->id,
		.flags = I915_EXEC_NO_RELOC,
		.batch_len = 4096,
	};

	submit_timeline_execbuf(context, &execbuf, run_engine_idx,
				0, 0,
				signal_syncobj, signal_value);

	gem_close(i915, objects[1].handle);
}

static void build_increment_engine_bb(struct inter_engine_batches *batch,
				      uint32_t mmio_base)
{
	uint32_t *bb = batch->increment_bb = calloc(1, 4096);

	*bb++ = MI_LOAD_REGISTER_MEM | 2;
	*bb++ = mmio_base + HSW_CS_GPR(0);
	batch->read0_ptrs[0] = bb;
	*bb++ = 0;
	*bb++ = 0;
	*bb++ = MI_LOAD_REGISTER_MEM | 2;
	*bb++ = mmio_base + HSW_CS_GPR(0) + 4;
	batch->read0_ptrs[1] = bb;
	*bb++ = 0;
	*bb++ = 0;

	*bb++ = MI_LOAD_REGISTER_MEM | 2;
	*bb++ = mmio_base + HSW_CS_GPR(1);
	batch->read1_ptrs[0] = bb;
	*bb++ = 0;
	*bb++ = 0;
	*bb++ = MI_LOAD_REGISTER_MEM | 2;
	*bb++ = mmio_base + HSW_CS_GPR(1) + 4;
	batch->read1_ptrs[1] = bb;
	*bb++ = 0;
	*bb++ = 0;

	*bb++ = MI_MATH(4);
	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(0));
	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(1));
	*bb++ = MI_MATH_ADD;
	*bb++ = MI_MATH_STORE(MI_MATH_REG(0), MI_MATH_REG_ACCU);

	*bb++ = MI_STORE_REGISTER_MEM | 2;
	*bb++ = mmio_base + HSW_CS_GPR(0);
	batch->write_ptrs[0] = bb;
	*bb++ = 0;
	*bb++ = 0;
	*bb++ = MI_STORE_REGISTER_MEM | 2;
	*bb++ = mmio_base + HSW_CS_GPR(0) + 4;
	batch->write_ptrs[1] = bb;
	*bb++ = 0;
	*bb++ = 0;

	*bb++ = MI_BATCH_BUFFER_END;

	batch->increment_bb_len = ALIGN((void *) bb - batch->increment_bb, 8);
}

static void increment_engine(struct inter_engine_context *context,
			     const intel_ctx_t *ctx,
			     int iteration,
			     uint32_t read0_engine_idx,
			     uint32_t read1_engine_idx,
			     uint32_t write_engine_idx,
			     uint32_t wait_syncobj,
			     uint64_t wait_value,
			     uint32_t signal_syncobj,
			     uint64_t signal_value)
{
	struct inter_engine_batches *batch = &context->batches[write_engine_idx];
	struct drm_i915_gem_relocation_entry relocs[3 * 2];
	struct drm_i915_gem_exec_object2 objects[2] = {
		context->engine_counter_object,
		{
			.handle = batch->increment_bb_handle,
			.relocs_ptr = to_user_pointer(relocs),
			.relocation_count = context->use_relocs ?
						ARRAY_SIZE(relocs) : 0,
		},
	};
	struct drm_i915_gem_execbuffer2 execbuf = {
		.buffers_ptr = to_user_pointer(&objects[0]),
		.buffer_count = ARRAY_SIZE(objects),
		.flags = I915_EXEC_HANDLE_LUT,
		.rsvd1 = ctx->id,
		.batch_len = batch->increment_bb_len,
	};

	memset(relocs, 0, sizeof(relocs));

	/* MI_LOAD_REGISTER_MEM */
	relocs[0].target_handle = 0;
	relocs[0].delta = read0_engine_idx * 8;
	relocs[0].offset = batch->read0_ptrs[0] - batch->increment_bb;
	relocs[0].presumed_offset = -1;
	relocs[1].target_handle = 0;
	relocs[1].delta = read0_engine_idx * 8 + 4;
	relocs[1].offset = batch->read0_ptrs[1] - batch->increment_bb;
	relocs[1].presumed_offset = -1;

	/* MI_LOAD_REGISTER_MEM */
	relocs[2].target_handle = 0;
	relocs[2].delta = read1_engine_idx * 8;
	relocs[2].offset = batch->read1_ptrs[0] - batch->increment_bb;
	relocs[2].presumed_offset = -1;
	relocs[3].target_handle = 0;
	relocs[3].delta = read1_engine_idx * 8 + 4;
	relocs[3].offset = batch->read1_ptrs[1] - batch->increment_bb;
	relocs[3].presumed_offset = -1;

	/* MI_STORE_REGISTER_MEM */
	relocs[4].target_handle = 0;
	relocs[4].delta = write_engine_idx * 8;
	relocs[4].offset = batch->write_ptrs[0] - batch->increment_bb;
	relocs[4].presumed_offset = -1;
	relocs[5].target_handle = 0;
	relocs[5].delta = write_engine_idx * 8 + 4;
	relocs[5].offset = batch->write_ptrs[1] - batch->increment_bb;
	relocs[5].presumed_offset = -1;

	/*
	 * For no-relocs prepare batch for dedicated write engine once
	 * as iteration doesn't matter for it. So we got full pipelining
	 * starting from the second iteration. For relocs we keep its previous
	 * behavior where kernel has to change offsets within bb for each round.
	 */
	if (!iteration && !context->use_relocs) {
		uint64_t counter_offset;
		uint32_t *bb;

		counter_offset = context->engine_counter_object.offset;
		bb = (uint32_t *) batch->increment_bb;

		for (int i = 0; i < ARRAY_SIZE(relocs); i++) {
			bb[relocs[i].offset / sizeof(uint32_t)] =
					counter_offset + relocs[i].delta;
			bb[relocs[i].offset / sizeof(uint32_t) + 1] =
					(counter_offset + relocs[i].delta) >> 32;
		}
		gem_write(context->fd, batch->increment_bb_handle, 0,
			  batch->increment_bb, batch->increment_bb_len);
	}

	submit_timeline_execbuf(context, &execbuf, write_engine_idx,
				wait_syncobj, wait_value,
				signal_syncobj, signal_value);

	context->engine_counter_object = objects[0];
}

static uint64_t fib(uint32_t iters)
{
	uint64_t last_value = 0;
	uint64_t value = 1;
	uint32_t i = 0;

	while (i < iters) {
		uint64_t new_value = value + last_value;

		last_value = value;
		value = new_value;
		i++;
	}

	return last_value;
}

static void setup_timeline_chain_engines(struct inter_engine_context *context, int fd,
					 const intel_ctx_cfg_t *cfg)
{
	memset(context, 0, sizeof(*context));

	context->fd = fd;
	context->cfg = cfg;
	context->engines = intel_engine_list_for_ctx_cfg(fd, cfg);
	igt_require(context->engines.nengines > 1);
	context->use_relocs = gem_has_relocations(fd);

	context->wait_ctx = intel_ctx_create(fd, cfg);
	context->wait_timeline = syncobj_create(fd, 0);

	context->engine_counter_object.handle = gem_create(fd, 4096);
	context->engine_counter_object.offset = COUNTER_OFFSET;
	if (context->use_relocs)
		context->engine_counter_object.flags |= EXEC_OBJECT_PINNED;

	for (uint32_t i = 0; i < ARRAY_SIZE(context->iterations); i++) {
		context->iterations[i].ctx = intel_ctx_create(fd, context->cfg);

		/* Give a different priority to all contexts. */
		gem_context_set_priority(fd, context->iterations[i].ctx->id,
					 I915_CONTEXT_MAX_USER_PRIORITY - ARRAY_SIZE(context->iterations) + i);
	}

	context->batches = calloc(context->engines.nengines,
				  sizeof(*context->batches));
	for (uint32_t e = 0; e < context->engines.nengines; e++) {
		struct inter_engine_batches *batches = &context->batches[e];

		batches->timeline = syncobj_create(fd, 0);

		build_increment_engine_bb(
			batches,
			gem_engine_mmio_base(fd, context->engines.engines[e].name));
		batches->increment_bb_handle = gem_create(fd, 4096);
		gem_write(fd, batches->increment_bb_handle, 0,
			  batches->increment_bb, batches->increment_bb_len);
	}

	for (uint32_t i = 0; i < 10; i++)
		igt_debug("%u = %"PRIu64"\n", i, fib(i));

	/* Bootstrap the fibonacci sequence */
	{
		uint64_t dword = 1;
		gem_write(fd, context->engine_counter_object.handle,
			  sizeof(dword) * (context->engines.nengines - 1),
			  &dword, sizeof(dword));
	}
}

static void teardown_timeline_chain_engines(struct inter_engine_context *context)
{
	gem_close(context->fd, context->engine_counter_object.handle);

	for (uint32_t i = 0; i < ARRAY_SIZE(context->iterations); i++) {
		intel_ctx_destroy(context->fd, context->iterations[i].ctx);
	}

	intel_ctx_destroy(context->fd, context->wait_ctx);
	syncobj_destroy(context->fd, context->wait_timeline);

	for (uint32_t e = 0; e < context->engines.nengines; e++) {
		struct inter_engine_batches *batches = &context->batches[e];

		syncobj_destroy(context->fd, batches->timeline);
		gem_close(context->fd, batches->increment_bb_handle);
		free(batches->increment_bb);
	}
	free(context->batches);
}

static void test_syncobj_timeline_chain_engines(int fd, const intel_ctx_cfg_t *cfg)
{
	struct inter_engine_context ctx;
	uint64_t *counter_output;

	setup_timeline_chain_engines(&ctx, fd, cfg);

	/*
	 * Delay all the other operations by making them depend on an
	 * active wait on the RCS.
	 */
	wait_engine(fd, &ctx, 0, ctx.wait_timeline, 1);

	for (uint32_t iter = 0; iter < ARRAY_SIZE(ctx.iterations); iter++) {
		for (uint32_t engine = 0; engine < ctx.engines.nengines; engine++) {
			uint32_t prev_prev_engine =
				(ctx.engines.nengines + engine - 2) % ctx.engines.nengines;
			uint32_t prev_engine =
				(ctx.engines.nengines + engine - 1) % ctx.engines.nengines;
			/*
			 * Pick up the wait engine semaphore for the
			 * first increment, then pick up the previous
			 * engine's timeline.
			 */
			uint32_t wait_syncobj =
				iter == 0 && engine == 0 ?
				ctx.wait_timeline : ctx.batches[prev_engine].timeline;
			uint32_t wait_value =
				iter == 0 && engine == 0 ?
				1 : (engine == 0 ? iter : (iter + 1));

			increment_engine(&ctx, ctx.iterations[iter].ctx, iter,
					 prev_prev_engine /* read0 engine */,
					 prev_engine /* read1 engine */,
					 engine /* write engine */,
					 wait_syncobj, wait_value,
					 ctx.batches[engine].timeline, iter + 1);
		}
	}

	gem_sync(fd, ctx.engine_counter_object.handle);

	counter_output = gem_mmap__device_coherent(fd, ctx.engine_counter_object.handle, 0, 4096, PROT_READ);

	for (uint32_t i = 0; i < ctx.engines.nengines; i++)
		igt_debug("engine %i (%s)\t= %016"PRIx64"\n", i,
			  ctx.engines.engines[i].name, counter_output[i]);

	/*
	 * Verify that we get the fibonacci number expected (we start
	 * at the sequence on the second number : 1).
	 */
	igt_assert_eq(counter_output[ctx.engines.nengines - 1],
		      fib(ARRAY_SIZE(ctx.iterations) * ctx.engines.nengines + 1));

	munmap(counter_output, 4096);

	teardown_timeline_chain_engines(&ctx);
}

static void test_syncobj_stationary_timeline_chain_engines(int fd, const intel_ctx_cfg_t *cfg)
{
	struct inter_engine_context ctx;
	uint64_t *counter_output;

	setup_timeline_chain_engines(&ctx, fd, cfg);

	/*
	 * Delay all the other operations by making them depend on an
	 * active wait on the RCS.
	 */
	wait_engine(fd, &ctx, 0, ctx.wait_timeline, 1);

	for (uint32_t iter = 0; iter < ARRAY_SIZE(ctx.iterations); iter++) {
		for (uint32_t engine = 0; engine < ctx.engines.nengines; engine++) {
			uint32_t prev_prev_engine =
				(ctx.engines.nengines + engine - 2) % ctx.engines.nengines;
			uint32_t prev_engine =
				(ctx.engines.nengines + engine - 1) % ctx.engines.nengines;
			/*
			 * Pick up the wait engine semaphore for the
			 * first increment, then pick up the previous
			 * engine's timeline.
			 */
			uint32_t wait_syncobj =
				iter == 0 && engine == 0 ?
				ctx.wait_timeline : ctx.batches[prev_engine].timeline;
			/*
			 * Always signal the value 10. Because the
			 * signal operations are submitted in order,
			 * we should always pickup the right
			 * dma-fence.
			 */
			uint32_t wait_value =
				iter == 0 && engine == 0 ?
				1 : 10;

			increment_engine(&ctx, ctx.iterations[iter].ctx, iter,
					 prev_prev_engine /* read0 engine */,
					 prev_engine /* read1 engine */,
					 engine /* write engine */,
					 wait_syncobj, wait_value,
					 ctx.batches[engine].timeline, 10);
		}
	}

	gem_sync(fd, ctx.engine_counter_object.handle);

	counter_output = gem_mmap__device_coherent(fd, ctx.engine_counter_object.handle, 0, 4096, PROT_READ);

	for (uint32_t i = 0; i < ctx.engines.nengines; i++)
		igt_debug("engine %i (%s)\t= %016"PRIx64"\n", i,
			  ctx.engines.engines[i].name, counter_output[i]);
	igt_assert_eq(counter_output[ctx.engines.nengines - 1],
		      fib(ARRAY_SIZE(ctx.iterations) * ctx.engines.nengines + 1));

	munmap(counter_output, 4096);

	teardown_timeline_chain_engines(&ctx);
}

static void test_syncobj_backward_timeline_chain_engines(int fd, const intel_ctx_cfg_t *cfg)
{
	struct inter_engine_context ctx;
	uint64_t *counter_output;

	setup_timeline_chain_engines(&ctx, fd, cfg);

	/*
	 * Delay all the other operations by making them depend on an
	 * active wait on the RCS.
	 */
	wait_engine(fd, &ctx, 0, ctx.wait_timeline, 1);

	for (uint32_t iter = 0; iter < ARRAY_SIZE(ctx.iterations); iter++) {
		for (uint32_t engine = 0; engine < ctx.engines.nengines; engine++) {
			uint32_t prev_prev_engine =
				(ctx.engines.nengines + engine - 2) % ctx.engines.nengines;
			uint32_t prev_engine =
				(ctx.engines.nengines + engine - 1) % ctx.engines.nengines;
			/*
			 * Pick up the wait engine semaphore for the
			 * first increment, then pick up the previous
			 * engine's timeline.
			 */
			uint32_t wait_syncobj =
				iter == 0 && engine == 0 ?
				ctx.wait_timeline : ctx.batches[prev_engine].timeline;
			/*
			 * Always signal the value 10. Because the
			 * signal operations are submitted in order,
			 * we should always pickup the right
			 * dma-fence.
			 */
			uint32_t wait_value =
				iter == 0 && engine == 0 ?
				1 : 1;

			increment_engine(&ctx, ctx.iterations[iter].ctx, iter,
					 prev_prev_engine /* read0 engine */,
					 prev_engine /* read1 engine */,
					 engine /* write engine */,
					 wait_syncobj, wait_value,
					 ctx.batches[engine].timeline, ARRAY_SIZE(ctx.iterations) - iter);
		}
	}

	gem_sync(fd, ctx.engine_counter_object.handle);

	counter_output = gem_mmap__device_coherent(fd, ctx.engine_counter_object.handle, 0, 4096, PROT_READ);

	for (uint32_t i = 0; i < ctx.engines.nengines; i++)
		igt_debug("engine %i (%s)\t= %016"PRIx64"\n", i,
			  ctx.engines.engines[i].name, counter_output[i]);
	igt_assert_eq(counter_output[ctx.engines.nengines - 1],
		      fib(ARRAY_SIZE(ctx.iterations) * ctx.engines.nengines + 1));

	munmap(counter_output, 4096);

	teardown_timeline_chain_engines(&ctx);
}

igt_main
{
	const struct intel_execution_engine2 *e;
	const intel_ctx_t *ctx;
	int i915 = -1;

	igt_fixture {
		i915 = drm_open_driver(DRIVER_INTEL);
		igt_require_gem(i915);
		igt_require(gem_has_exec_fence(i915));
		gem_require_mmap_device_coherent(i915);
		ctx = intel_ctx_create_all_physical(i915);

		gem_submission_print_method(i915);
	}

	igt_subtest_group {
		igt_hang_t hang;

		igt_fixture {
			igt_fork_hang_detector(i915);
		}

		igt_describe("Basic check for composite fence on all busy engines.");
		igt_subtest("basic-busy-all")
			test_fence_busy_all(i915, ctx, 0);

		igt_describe("Basic check for composite fence with additional wait on all busy"
			     " engines.");
		igt_subtest("basic-wait-all")
			test_fence_busy_all(i915, ctx, WAIT);

		igt_fixture {
			igt_stop_hang_detector();
			hang = igt_allow_hang(i915, ctx->id, 0);
		}

		igt_describe("Check for composite fence on all busy engines with a pending gpu"
			     " hang.");
		igt_subtest("busy-hang-all")
			test_fence_busy_all(i915, ctx, HANG);

		igt_describe("Check for composite fence with additional wait on all busy engines"
			     " and with a pending gpu hang.");
		igt_subtest("wait-hang-all")
			test_fence_busy_all(i915, ctx, WAIT | HANG);

		igt_fixture {
			igt_disallow_hang(i915, hang);
		}
	}

	igt_subtest_group {
		for_each_ctx_engine(i915, ctx, e) {
			igt_fixture {
				igt_require(gem_class_can_store_dword(i915, e->class));
			}
		}
		igt_subtest_group {
			igt_fixture {
				igt_fork_hang_detector(i915);
				intel_allocator_multiprocess_start();
			}

			igt_describe("Basic check for explicit fence on each busy engine.");
			igt_subtest_with_dynamic("basic-busy") {
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_fence_busy(i915, ctx, e, 0);
				}
			}

			igt_describe("Basic check for explicit fence with additinal wait time on"
				     " each busy engine.");
			igt_subtest_with_dynamic("basic-wait") {
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_fence_busy(i915, ctx, e, WAIT);
				}
			}

			igt_describe("Basic check for explicit fence with async wait on each"
				     " engine.");
			igt_subtest_with_dynamic("basic-await") {
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_fence_await(i915, ctx, e, 0);
				}
			}

			igt_describe("Check for explicit fence with non-blocking wait on each"
				     " engine.");
			igt_subtest_with_dynamic("nb-await") {
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_fence_await(i915, ctx, e,
								 NONBLOCK);
				}
			}
			igt_subtest_with_dynamic("keep-in-fence") {
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_keep_in_fence(i915, ctx, e);
				}
			}
			igt_subtest_with_dynamic("parallel") {
				igt_require(has_submit_fence(i915));
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name) {
						igt_until_timeout(2)
							test_parallel(i915, ctx, e);
					}
				}
			}

			igt_subtest_with_dynamic("concurrent") {
				igt_require(has_submit_fence(i915));
				igt_require(gem_scheduler_has_semaphores(i915));
				igt_require(gem_scheduler_has_preemption(i915));
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_concurrent(i915, ctx, e);
				}
			}

			igt_subtest_with_dynamic("submit") {
				igt_require(gem_scheduler_has_semaphores(i915));
				igt_require(gem_scheduler_has_preemption(i915));
				igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);

				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_submit_fence(i915, ctx, e);
				}
			}

			igt_subtest_with_dynamic("submit3") {
				igt_require(gem_scheduler_has_semaphores(i915));
				igt_require(gem_scheduler_has_preemption(i915));
				igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);

				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_submitN(i915, ctx, e, 3);
				}
			}

			igt_subtest_with_dynamic("submit67") {
				igt_require(gem_scheduler_has_semaphores(i915));
				igt_require(gem_scheduler_has_preemption(i915));
				igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);

				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_submitN(i915, ctx, e, 67);
				}
			}

			igt_subtest("submit-chain") {
				igt_require(has_submit_fence(i915));
				test_submit_chain(i915, ctx);
			}

			igt_fixture {
				intel_allocator_multiprocess_stop();
				igt_stop_hang_detector();
			}
		}

		igt_subtest_group {
			igt_hang_t hang;

			igt_fixture {
				hang = igt_allow_hang(i915, ctx->id, 0);
				intel_allocator_multiprocess_start();
			}

			igt_subtest_with_dynamic("busy-hang") {
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_fence_busy(i915, ctx, e, HANG);
				}
			}
			igt_subtest_with_dynamic("wait-hang") {
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_fence_busy(i915, ctx, e, HANG | WAIT);
				}
			}
			igt_subtest_with_dynamic("await-hang") {
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_fence_await(i915, ctx, e, HANG);
				}
			}
			igt_subtest_with_dynamic("nb-await-hang") {
				for_each_ctx_engine(i915, ctx, e) {
					igt_dynamic_f("%s", e->name)
						test_fence_await(i915, ctx, e, NONBLOCK | HANG);
				}
			}
			igt_fixture {
				intel_allocator_multiprocess_stop();
				igt_disallow_hang(i915, hang);
			}
		}
	}

	igt_subtest_group {
		long ring_size = 0;

		igt_fixture {
			ring_size = gem_submission_measure(i915, &ctx->cfg,
							   ALL_ENGINES);
			igt_info("Ring size: %ld batches\n", ring_size);
			igt_require(ring_size);

			gem_require_contexts(i915);
		}

		igt_subtest("long-history")
			test_long_history(i915, ctx, ring_size, 0);

		igt_subtest("expired-history")
			test_long_history(i915, ctx, ring_size, EXPIRED);
	}

	igt_subtest_group { /* syncobj */
		igt_fixture {
			igt_require(exec_has_fence_array(i915));
			igt_assert(has_syncobj(i915));
			igt_fork_hang_detector(i915);
			intel_allocator_multiprocess_start();
		}

		igt_subtest("invalid-fence-array")
			test_invalid_fence_array(i915);

		igt_subtest("syncobj-unused-fence")
			test_syncobj_unused_fence(i915);

		igt_subtest("syncobj-invalid-wait")
			test_syncobj_invalid_wait(i915);

		igt_subtest("syncobj-invalid-flags")
			test_syncobj_invalid_flags(i915);

		igt_subtest("syncobj-signal")
			test_syncobj_signal(i915);

		igt_subtest("syncobj-wait")
			test_syncobj_wait(i915, ctx);

		igt_subtest("syncobj-export")
			test_syncobj_export(i915);

		igt_subtest("syncobj-repeat")
			test_syncobj_repeat(i915);

		igt_subtest("syncobj-import")
			test_syncobj_import(i915);

		igt_subtest("syncobj-channel")
			test_syncobj_channel(i915);

		igt_fixture {
			intel_allocator_multiprocess_stop();
			igt_stop_hang_detector();
		}
	}

	igt_subtest_group { /* syncobj timeline */
		igt_fixture {
			igt_require(exec_has_timeline_fences(i915));
			igt_require(has_syncobj_timeline(i915));
			igt_fork_hang_detector(i915);
		}

		igt_describe(test_invalid_timeline_fence_array_desc);
		igt_subtest("invalid-timeline-fence-array")
			test_invalid_timeline_fence_array(i915);

		igt_describe(test_syncobj_timeline_unused_fence_desc);
		igt_subtest("syncobj-timeline-unused-fence")
			test_syncobj_timeline_unused_fence(i915);

		igt_describe(test_syncobj_timeline_invalid_wait_desc);
		igt_subtest("syncobj-timeline-invalid-wait")
			test_syncobj_timeline_invalid_wait(i915);

		igt_describe(test_syncobj_timeline_invalid_flags_desc);
		igt_subtest("syncobj-timeline-invalid-flags")
			test_syncobj_timeline_invalid_flags(i915);

		igt_describe(test_syncobj_timeline_signal_desc);
		igt_subtest("syncobj-timeline-signal")
			test_syncobj_timeline_signal(i915);

		igt_describe(test_syncobj_timeline_wait_desc);
		igt_subtest("syncobj-timeline-wait")
			test_syncobj_timeline_wait(i915, ctx);

		igt_describe(test_syncobj_timeline_export_desc);
		igt_subtest("syncobj-timeline-export")
			test_syncobj_timeline_export(i915);

		igt_describe(test_syncobj_timeline_repeat_desc);
		igt_subtest("syncobj-timeline-repeat")
			test_syncobj_timeline_repeat(i915);

		igt_describe(test_syncobj_timeline_multiple_ext_nodes_desc);
		igt_subtest("syncobj-timeline-multiple-ext-nodes")
			test_syncobj_timeline_multiple_ext_nodes(i915);

		igt_subtest_group { /* syncobj timeline engine chaining */
			igt_fixture {
				/*
				 * We need support for MI_ALU on all
				 * engines which seems to be there
				 * only on Gen8+
				 */
				igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
			}

			igt_subtest("syncobj-timeline-chain-engines")
				test_syncobj_timeline_chain_engines(i915, &ctx->cfg);

			igt_subtest("syncobj-stationary-timeline-chain-engines")
				test_syncobj_stationary_timeline_chain_engines(i915, &ctx->cfg);

			igt_subtest("syncobj-backward-timeline-chain-engines")
				test_syncobj_backward_timeline_chain_engines(i915, &ctx->cfg);
		}

		igt_fixture {
			igt_stop_hang_detector();
		}
	}

	igt_fixture {
		close(i915);
	}
}