130 files changed, 45646 insertions, 0 deletions
diff --git a/tests/i915/gem_bad_address.c b/tests/i915/gem_bad_address.c
new file mode 100644
index 00000000..a970dfa4
--- /dev/null
+++ b/tests/i915/gem_bad_address.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+#define BAD_GTT_DEST ((512*1024*1024)) /* past end of aperture */
+
+static void
+bad_store(void)
+{
+	BEGIN_BATCH(4, 0);
+	OUT_BATCH(MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL | 1 << 21);
+	OUT_BATCH(0);
+	OUT_BATCH(BAD_GTT_DEST);
+	OUT_BATCH(0xdeadbeef);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+igt_simple_main
+{
+	int fd;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	bad_store();
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_bad_batch.c b/tests/i915/gem_bad_batch.c
new file mode 100644
index 00000000..a11e1918
--- /dev/null
+++ b/tests/i915/gem_bad_batch.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+static void
+bad_batch(void)
+{
+	BEGIN_BATCH(2, 0);
+	OUT_BATCH(MI_BATCH_BUFFER_START);
+	OUT_BATCH(0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+igt_simple_main
+{
+	int fd;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	bad_batch();
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_bad_blit.c b/tests/i915/gem_bad_blit.c
new file mode 100644
index 00000000..45dfc809
--- /dev/null
+++ b/tests/i915/gem_bad_blit.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_blits.c
+ *
+ * This is a test of doing many tiled blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to catch a couple types of failure;
+ * - Fence management problems on pre-965.
+ * - A17 or L-shaped memory tiling workaround problems in acceleration.
+ *
+ * The model is to fill a collection of 1MB objects in a way that can't trip
+ * over A6 swizzling -- upload data to a non-tiled object, blit to the tiled
+ * object.  Then, copy the 1MB objects randomly between each other for a while.
+ * Finally, download their data through linear objects again and see what
+ * resulted.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+#define BAD_GTT_DEST (256*1024*1024ULL) /* past end of aperture */
+
+static void
+bad_blit(drm_intel_bo *src_bo, uint32_t devid)
+{
+	uint32_t src_pitch = 512, dst_pitch = 512;
+	uint32_t cmd_bits = 0;
+
+	if (IS_965(devid)) {
+		src_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
+	}
+
+	if (IS_965(devid)) {
+		dst_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	BLIT_COPY_BATCH_START(cmd_bits);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst_pitch);
+	OUT_BATCH(0); /* dst x1,y1 */
+	OUT_BATCH((64 << 16) | 64); /* 64x64 blit */
+	OUT_BATCH(BAD_GTT_DEST);
+	if (batch->gen >= 8)
+		OUT_BATCH(BAD_GTT_DEST >> 32); /* Upper 16 bits */
+	OUT_BATCH(0); /* src x1,y1 */
+	OUT_BATCH(src_pitch);
+	OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+igt_simple_main
+{
+	drm_intel_bo *src;
+	int fd;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	src = drm_intel_bo_alloc(bufmgr, "src", 128 * 128, 4096);
+
+	bad_blit(src, batch->devid);
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_bad_reloc.c b/tests/i915/gem_bad_reloc.c
new file mode 100644
index 00000000..7624cd8e
--- /dev/null
+++ b/tests/i915/gem_bad_reloc.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Simulates SNA behaviour using negative self-relocations"
+		     " for STATE_BASE_ADDRESS command packets.");
+
+#define USE_LUT (1 << 12)
+#define BIAS (256*1024)
+
+/* Simulates SNA behaviour using negative self-relocations for
+ * STATE_BASE_ADDRESS command packets. If they wrap around (to values greater
+ * than the total size of the GTT), the GPU will hang.
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=78533
+ */
+static void negative_reloc(int fd, unsigned engine, unsigned flags)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_relocation_entry reloc[1000];
+	uint64_t gtt_max = gem_aperture_size(fd);
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint64_t *offsets;
+	int i;
+
+	gem_require_ring(fd, engine);
+	igt_require(intel_gen(intel_get_drm_devid(fd)) >= 7);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 8192);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = (uintptr_t)&obj;
+	execbuf.buffer_count = 1;
+	execbuf.flags = engine | (flags & USE_LUT);
+	igt_require(__gem_execbuf(fd, &execbuf) == 0);
+
+	igt_info("Found offset %lld for 4k batch\n", (long long)obj.offset);
+	/*
+	 * Ideally we'd like to be able to control where the kernel is going to
+	 * place the buffer. We don't SKIP here because it causes the test
+	 * to "randomly" flip-flop between the SKIP and PASS states.
+	 */
+	if (obj.offset < BIAS) {
+		igt_info("Offset is below BIAS, not testing anything\n");
+		return;
+	}
+
+	memset(reloc, 0, sizeof(reloc));
+	for (i = 0; i < ARRAY_SIZE(reloc); i++) {
+		reloc[i].offset = 8 + 8*i;
+		reloc[i].delta = -BIAS*i/1024;
+		reloc[i].presumed_offset = -1;
+		reloc[i].target_handle = flags & USE_LUT ? 0 : obj.handle;
+		reloc[i].read_domains = I915_GEM_DOMAIN_COMMAND;
+	}
+	obj.relocation_count = i;
+	obj.relocs_ptr = (uintptr_t)reloc;
+	gem_execbuf(fd, &execbuf);
+
+	igt_info("Batch is now at offset %#llx, max GTT %#llx\n",
+		 (long long)obj.offset, (long long)gtt_max);
+
+	offsets = gem_mmap__cpu(fd, obj.handle, 0, 8192, PROT_READ);
+	gem_set_domain(fd, obj.handle, I915_GEM_DOMAIN_CPU, 0);
+	gem_close(fd, obj.handle);
+
+	for (i = 0; i < ARRAY_SIZE(reloc); i++)
+		igt_assert_f(offsets[1 + i] < gtt_max,
+			     "Offset[%d]=%#llx, expected less than %#llx\n",
+			     i, (long long)offsets[i+i], (long long)gtt_max);
+	munmap(offsets, 8192);
+}
+
+static void negative_reloc_blt(int fd)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[1024][2];
+	struct drm_i915_gem_relocation_entry reloc;
+	uint32_t buf[1024], *b;
+	int i;
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.offset = 4 * sizeof(uint32_t);
+	reloc.presumed_offset = ~0ULL;
+	reloc.delta = -4096;
+	reloc.target_handle = 0;
+	reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+
+	for (i = 0; i < 1024; i++) {
+		memset(obj[i], 0, sizeof(obj[i]));
+
+		obj[i][0].handle = gem_create(fd, 4096);
+		obj[i][0].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+		b = buf;
+		*b++ = XY_COLOR_BLT_CMD_NOLEN |
+			((gen >= 8) ? 5 : 4) |
+			COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+		*b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096;
+		*b++ = 1 << 16 | 0;
+		*b++ = 2 << 16 | 1024;
+		*b++ = ~0;
+		if (gen >= 8)
+			*b++ = ~0;
+		*b++ = 0xc0ffee ^ i;
+		*b++ = MI_BATCH_BUFFER_END;
+		if ((b - buf) & 1)
+			*b++ = 0;
+
+		obj[i][1].handle = gem_create(fd, 4096);
+		gem_write(fd, obj[i][1].handle, 0, buf, (b - buf) * sizeof(uint32_t));
+		obj[i][1].relocation_count = 1;
+		obj[i][1].relocs_ptr = (uintptr_t)&reloc;
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffer_count = 2;
+	execbuf.batch_len = (b - buf) * sizeof(uint32_t);
+	execbuf.flags = USE_LUT;
+	if (gen >= 6)
+		execbuf.flags |= I915_EXEC_BLT;
+
+	for (i = 0; i < 1024; i++) {
+		execbuf.buffers_ptr = (uintptr_t)obj[i];
+		gem_execbuf(fd, &execbuf);
+	}
+
+	for (i = 1024; i--;) {
+		gem_read(fd, obj[i][0].handle,
+			 i*sizeof(uint32_t), buf + i, sizeof(uint32_t));
+		gem_close(fd, obj[i][0].handle);
+		gem_close(fd, obj[i][1].handle);
+	}
+
+	if (0) {
+		for (i = 0; i < 1024; i += 8)
+			igt_info("%08x %08x %08x %08x %08x %08x %08x %08x\n",
+				 buf[i + 0], buf[i + 1], buf[i + 2], buf[i + 3],
+				 buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]);
+	}
+	for (i = 0; i < 1024; i++)
+		igt_assert_eq(buf[i], 0xc0ffee ^ i);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd = -1;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("negative-reloc-%s", e->name)
+			negative_reloc(fd, e->exec_id | e->flags, 0);
+
+		igt_subtest_f("negative-reloc-lut-%s", e->name)
+			negative_reloc(fd, e->exec_id | e->flags, USE_LUT);
+	}
+
+	igt_subtest("negative-reloc-bltcopy")
+		negative_reloc_blt(fd);
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_basic.c b/tests/i915/gem_basic.c
new file mode 100644
index 00000000..9f7412f2
--- /dev/null
+++ b/tests/i915/gem_basic.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+static void
+test_bad_close(int fd)
+{
+	struct drm_gem_close close_bo;
+	int ret;
+
+	igt_info("Testing error return on bad close ioctl.\n");
+
+	close_bo.handle = 0x10101010;
+	ret = ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
+
+	igt_assert(ret == -1 && errno == EINVAL);
+}
+
+static void
+test_create_close(int fd)
+{
+	uint32_t handle;
+
+	igt_info("Testing creating and closing an object.\n");
+
+	handle = gem_create(fd, 16*1024);
+
+	gem_close(fd, handle);
+}
+
+static void
+test_create_fd_close(int fd)
+{
+	igt_info("Testing closing with an object allocated.\n");
+
+	gem_create(fd, 16*1024);
+	/* leak it */
+
+	close(fd);
+}
+
+int fd;
+
+igt_main
+{
+	igt_fixture
+		fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_subtest("bad-close")
+		test_bad_close(fd);
+	igt_subtest("create-close")
+		test_create_close(fd);
+	igt_subtest("create-fd-close")
+		test_create_fd_close(fd);
+}
diff --git a/tests/i915/gem_busy.c b/tests/i915/gem_busy.c
new file mode 100644
index 00000000..76b44a5d
--- /dev/null
+++ b/tests/i915/gem_busy.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+
+#include "igt.h"
+#include "igt_rand.h"
+#include "igt_vgem.h"
+#include "i915/gem_ring.h"
+
+#define LOCAL_EXEC_NO_RELOC (1<<11)
+#define PAGE_ALIGN(x) ALIGN(x, 4096)
+
+/* Exercise the busy-ioctl, ensuring the ABI is never broken */
+IGT_TEST_DESCRIPTION("Basic check of busy-ioctl ABI.");
+
+enum { TEST = 0, BUSY, BATCH };
+
+static bool gem_busy(int fd, uint32_t handle)
+{
+	struct drm_i915_gem_busy busy;
+
+	memset(&busy, 0, sizeof(busy));
+	busy.handle = handle;
+
+	do_ioctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+
+	return busy.busy != 0;
+}
+
+static void __gem_busy(int fd,
+		       uint32_t handle,
+		       uint32_t *read,
+		       uint32_t *write)
+{
+	struct drm_i915_gem_busy busy;
+
+	memset(&busy, 0, sizeof(busy));
+	busy.handle = handle;
+
+	do_ioctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+
+	*write = busy.busy & 0xffff;
+	*read = busy.busy >> 16;
+}
+
+static bool exec_noop(int fd,
+		      uint32_t *handles,
+		      unsigned ring,
+		      bool write)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[3];
+
+	memset(exec, 0, sizeof(exec));
+	exec[0].handle = handles[BUSY];
+	exec[1].handle = handles[TEST];
+	if (write)
+		exec[1].flags |= EXEC_OBJECT_WRITE;
+	exec[2].handle = handles[BATCH];
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(exec);
+	execbuf.buffer_count = 3;
+	execbuf.flags = ring;
+	igt_debug("Queuing handle for %s on ring %d\n",
+		  write ? "writing" : "reading", ring & 0x7);
+	return __gem_execbuf(fd, &execbuf) == 0;
+}
+
+static bool still_busy(int fd, uint32_t handle)
+{
+	uint32_t read, write;
+	__gem_busy(fd, handle, &read, &write);
+	return write;
+}
+
+static void semaphore(int fd, unsigned ring, uint32_t flags)
+{
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	igt_spin_t *spin;
+	uint32_t handle[3];
+	uint32_t read, write;
+	uint32_t active;
+	unsigned i;
+
+	gem_require_ring(fd, ring | flags);
+
+	handle[TEST] = gem_create(fd, 4096);
+	handle[BATCH] = gem_create(fd, 4096);
+	gem_write(fd, handle[BATCH], 0, &bbe, sizeof(bbe));
+
+	/* Create a long running batch which we can use to hog the GPU */
+	handle[BUSY] = gem_create(fd, 4096);
+	spin = igt_spin_batch_new(fd,
+				  .engine = ring,
+				  .dependency = handle[BUSY]);
+
+	/* Queue a batch after the busy, it should block and remain "busy" */
+	igt_assert(exec_noop(fd, handle, ring | flags, false));
+	igt_assert(still_busy(fd, handle[BUSY]));
+	__gem_busy(fd, handle[TEST], &read, &write);
+	igt_assert_eq(read, 1 << ring);
+	igt_assert_eq(write, 0);
+
+	/* Requeue with a write */
+	igt_assert(exec_noop(fd, handle, ring | flags, true));
+	igt_assert(still_busy(fd, handle[BUSY]));
+	__gem_busy(fd, handle[TEST], &read, &write);
+	igt_assert_eq(read, 1 << ring);
+	igt_assert_eq(write, ring);
+
+	/* Now queue it for a read across all available rings */
+	active = 0;
+	for (i = I915_EXEC_RENDER; i <= I915_EXEC_VEBOX; i++) {
+		if (exec_noop(fd, handle, i | flags, false))
+			active |= 1 << i;
+	}
+	igt_assert(still_busy(fd, handle[BUSY]));
+	__gem_busy(fd, handle[TEST], &read, &write);
+	igt_assert_eq(read, active);
+	igt_assert_eq(write, ring); /* from the earlier write */
+
+	/* Check that our long batch was long enough */
+	igt_assert(still_busy(fd, handle[BUSY]));
+	igt_spin_batch_free(fd, spin);
+
+	/* And make sure it becomes idle again */
+	gem_sync(fd, handle[TEST]);
+	__gem_busy(fd, handle[TEST], &read, &write);
+	igt_assert_eq(read, 0);
+	igt_assert_eq(write, 0);
+
+	for (i = TEST; i <= BATCH; i++)
+		gem_close(fd, handle[i]);
+}
+
+#define PARALLEL 1
+#define HANG 2
+static void one(int fd, unsigned ring, unsigned test_flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[2];
+#define SCRATCH 0
+#define BATCH 1
+	struct drm_i915_gem_relocation_entry store[1024+1];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned size = ALIGN(ARRAY_SIZE(store)*16 + 4, 4096);
+	const unsigned uabi = ring & 63;
+	uint32_t read[2], write[2];
+	struct timespec tv;
+	uint32_t *batch, *bbe;
+	int i, count, timeout;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	memset(obj, 0, sizeof(obj));
+	obj[SCRATCH].handle = gem_create(fd, 4096);
+
+	obj[BATCH].handle = gem_create(fd, size);
+	obj[BATCH].relocs_ptr = to_user_pointer(store);
+	obj[BATCH].relocation_count = ARRAY_SIZE(store);
+	memset(store, 0, sizeof(store));
+
+	batch = gem_mmap__wc(fd, obj[BATCH].handle, 0, size, PROT_WRITE);
+	gem_set_domain(fd, obj[BATCH].handle,
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	i = 0;
+	for (count = 0; count < 1024; count++) {
+		store[count].target_handle = obj[SCRATCH].handle;
+		store[count].presumed_offset = -1;
+		store[count].offset = sizeof(uint32_t) * (i + 1);
+		store[count].delta = sizeof(uint32_t) * count;
+		store[count].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		store[count].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			batch[++i] = 0;
+			batch[++i] = 0;
+		} else if (gen >= 4) {
+			batch[++i] = 0;
+			batch[++i] = 0;
+			store[count].offset += sizeof(uint32_t);
+		} else {
+			batch[i]--;
+			batch[++i] = 0;
+		}
+		batch[++i] = count;
+		i++;
+	}
+
+	bbe = &batch[i];
+	store[count].target_handle = obj[BATCH].handle; /* recurse */
+	store[count].presumed_offset = 0;
+	store[count].offset = sizeof(uint32_t) * (i + 1);
+	store[count].delta = 0;
+	store[count].read_domains = I915_GEM_DOMAIN_COMMAND;
+	store[count].write_domain = 0;
+	batch[i] = MI_BATCH_BUFFER_START;
+	if (gen >= 8) {
+		batch[i] |= 1 << 8 | 1;
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 6) {
+		batch[i] |= 1 << 8;
+		batch[++i] = 0;
+	} else {
+		batch[i] |= 2 << 6;
+		batch[++i] = 0;
+		if (gen < 4) {
+			batch[i] |= 1;
+			store[count].delta = 1;
+		}
+	}
+	i++;
+
+	igt_assert(i < size/sizeof(*batch));
+	igt_require(__gem_execbuf(fd, &execbuf) == 0);
+
+	__gem_busy(fd, obj[SCRATCH].handle, &read[SCRATCH], &write[SCRATCH]);
+	__gem_busy(fd, obj[BATCH].handle, &read[BATCH], &write[BATCH]);
+
+	if (test_flags & PARALLEL) {
+		unsigned other;
+
+		for_each_physical_engine(fd, other) {
+			if (other == ring)
+				continue;
+
+			if (!gem_can_store_dword(fd, other))
+				continue;
+
+			igt_debug("Testing %s in parallel\n", e__->name);
+			one(fd, other, 0);
+		}
+	}
+
+	timeout = 120;
+	if ((test_flags & HANG) == 0) {
+		*bbe = MI_BATCH_BUFFER_END;
+		__sync_synchronize();
+		timeout = 1;
+	}
+
+	igt_assert_eq(write[SCRATCH], uabi);
+	igt_assert_eq_u32(read[SCRATCH], 1 << uabi);
+
+	igt_assert_eq(write[BATCH], 0);
+	igt_assert_eq_u32(read[BATCH], 1 << uabi);
+
+	/* Calling busy in a loop should be enough to flush the rendering */
+	memset(&tv, 0, sizeof(tv));
+	while (gem_busy(fd, obj[BATCH].handle))
+		igt_assert(igt_seconds_elapsed(&tv) < timeout);
+	igt_assert(!gem_busy(fd, obj[SCRATCH].handle));
+
+	munmap(batch, size);
+	batch = gem_mmap__wc(fd, obj[SCRATCH].handle, 0, 4096, PROT_READ);
+	for (i = 0; i < 1024; i++)
+		igt_assert_eq_u32(batch[i], i);
+	munmap(batch, 4096);
+
+	gem_close(fd, obj[BATCH].handle);
+	gem_close(fd, obj[SCRATCH].handle);
+}
+
+static void xchg_u32(void *array, unsigned i, unsigned j)
+{
+	uint32_t *u32 = array;
+	uint32_t tmp = u32[i];
+	u32[i] = u32[j];
+	u32[j] = tmp;
+}
+
+static void close_race(int fd)
+{
+	const unsigned int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	const unsigned int nhandles = gem_measure_ring_inflight(fd, ALL_ENGINES, 0) / 2;
+	unsigned int engines[16], nengine;
+	unsigned long *control;
+	uint32_t *handles;
+	int i;
+
+	igt_require(ncpus > 1);
+	intel_require_memory(nhandles, 4096, CHECK_RAM);
+
+	/*
+	 * One thread spawning work and randomly closing handles.
+	 * One background thread per cpu checking busyness.
+	 */
+
+	nengine = 0;
+	for_each_engine(fd, i)
+		engines[nengine++] = i;
+	igt_require(nengine);
+
+	control = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(control != MAP_FAILED);
+
+	handles = mmap(NULL, PAGE_ALIGN(nhandles*sizeof(*handles)),
+		   PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(handles != MAP_FAILED);
+
+	igt_fork(child, ncpus - 1) {
+		struct drm_i915_gem_busy busy;
+		uint32_t indirection[nhandles];
+		unsigned long count = 0;
+
+		for (i = 0; i < nhandles; i++)
+			indirection[i] = i;
+
+		hars_petruska_f54_1_random_perturb(child);
+
+		memset(&busy, 0, sizeof(busy));
+		do {
+			igt_permute_array(indirection, nhandles, xchg_u32);
+			__sync_synchronize();
+			for (i = 0; i < nhandles; i++) {
+				busy.handle = handles[indirection[i]];
+				/* Check that the busy computation doesn't
+				 * explode in the face of random gem_close().
+				 */
+				drmIoctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+			}
+			count++;
+		} while(*(volatile long *)control == 0);
+
+		igt_debug("child[%d]: count = %lu\n", child, count);
+		control[child + 1] = count;
+	}
+
+	igt_fork(child, 1) {
+		struct sched_param rt = {.sched_priority = 99 };
+		igt_spin_t *spin[nhandles];
+		unsigned long count = 0;
+
+		igt_assert(sched_setscheduler(getpid(), SCHED_RR, &rt) == 0);
+
+		for (i = 0; i < nhandles; i++) {
+			spin[i] = __igt_spin_batch_new(fd,
+						       .engine = engines[rand() % nengine]);
+			handles[i] = spin[i]->handle;
+		}
+
+		igt_until_timeout(20) {
+			for (i = 0; i < nhandles; i++) {
+				igt_spin_batch_free(fd, spin[i]);
+				spin[i] = __igt_spin_batch_new(fd,
+							       .engine = engines[rand() % nengine]);
+				handles[i] = spin[i]->handle;
+				__sync_synchronize();
+			}
+			count += nhandles;
+		}
+		control[0] = count;
+		__sync_synchronize();
+
+		for (i = 0; i < nhandles; i++)
+			igt_spin_batch_free(fd, spin[i]);
+	}
+	igt_waitchildren();
+
+	for (i = 0; i < ncpus - 1; i++)
+		control[ncpus] += control[i + 1];
+	igt_info("Total execs %lu, busy-ioctls %lu\n",
+		 control[0], control[ncpus] * nhandles);
+
+	munmap(handles, PAGE_ALIGN(nhandles * sizeof(*handles)));
+	munmap(control, 4096);
+
+	gem_quiescent_gpu(fd);
+}
+
+static bool has_semaphores(int fd)
+{
+	struct drm_i915_getparam gp;
+	int val = -1;
+
+	memset(&gp, 0, sizeof(gp));
+	gp.param = I915_PARAM_HAS_SEMAPHORES;
+	gp.value = &val;
+
+	drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	errno = 0;
+
+	return val > 0;
+}
+
+static bool has_extended_busy_ioctl(int fd)
+{
+	igt_spin_t *spin = igt_spin_batch_new(fd, .engine = I915_EXEC_RENDER);
+	uint32_t read, write;
+
+	__gem_busy(fd, spin->handle, &read, &write);
+	igt_spin_batch_free(fd, spin);
+
+	return read != 0;
+}
+
+static void basic(int fd, unsigned ring, unsigned flags)
+{
+	igt_spin_t *spin = igt_spin_batch_new(fd, .engine = ring);
+	struct timespec tv;
+	int timeout;
+	bool busy;
+
+	busy = gem_bo_busy(fd, spin->handle);
+
+	timeout = 120;
+	if ((flags & HANG) == 0) {
+		igt_spin_batch_end(spin);
+		timeout = 1;
+	}
+
+	igt_assert(busy);
+	memset(&tv, 0, sizeof(tv));
+	while (gem_bo_busy(fd, spin->handle)) {
+		if (igt_seconds_elapsed(&tv) > timeout) {
+			igt_debugfs_dump(fd, "i915_engine_info");
+			igt_debugfs_dump(fd, "i915_hangcheck_info");
+			igt_assert_f(igt_seconds_elapsed(&tv) < timeout,
+				     "%s batch did not complete within %ds\n",
+				     flags & HANG ? "Hanging" : "Normal",
+				     timeout);
+		}
+	}
+
+	igt_spin_batch_free(fd, spin);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd = -1;
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(fd);
+		igt_require(gem_can_store_dword(fd, 0));
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_fork_hang_detector(fd);
+		}
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_group {
+				igt_subtest_f("%sbusy-%s",
+					      e->exec_id == 0 ? "basic-" : "",
+					      e->name) {
+					igt_require(gem_has_ring(fd, e->exec_id | e->flags));
+					gem_quiescent_gpu(fd);
+					basic(fd, e->exec_id | e->flags, 0);
+				}
+			}
+		}
+
+		igt_subtest_group {
+			igt_fixture {
+				igt_require(has_extended_busy_ioctl(fd));
+				gem_require_mmap_wc(fd);
+			}
+
+			for (e = intel_execution_engines; e->name; e++) {
+				/* default exec-id is purely symbolic */
+				if (e->exec_id == 0)
+					continue;
+
+				igt_subtest_f("extended-%s", e->name) {
+					igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+					igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
+					gem_quiescent_gpu(fd);
+					one(fd, e->exec_id | e->flags, 0);
+					gem_quiescent_gpu(fd);
+				}
+			}
+
+			for (e = intel_execution_engines; e->name; e++) {
+				/* default exec-id is purely symbolic */
+				if (e->exec_id == 0)
+					continue;
+
+				igt_subtest_f("extended-parallel-%s", e->name) {
+					igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+					igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
+
+					gem_quiescent_gpu(fd);
+					one(fd, e->exec_id | e->flags, PARALLEL);
+					gem_quiescent_gpu(fd);
+				}
+			}
+		}
+
+		igt_subtest_group {
+			igt_fixture {
+				igt_require(has_extended_busy_ioctl(fd));
+				igt_require(has_semaphores(fd));
+			}
+
+			for (e = intel_execution_engines; e->name; e++) {
+				/* default exec-id is purely symbolic */
+				if (e->exec_id == 0)
+					continue;
+
+				igt_subtest_f("extended-semaphore-%s", e->name)
+					semaphore(fd, e->exec_id, e->flags);
+			}
+		}
+
+		igt_subtest("close-race")
+			close_race(fd);
+
+		igt_fixture {
+			igt_stop_hang_detector();
+		}
+	}
+
+	igt_subtest_group {
+		igt_hang_t hang;
+
+		igt_fixture {
+			hang = igt_allow_hang(fd, 0, 0);
+		}
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_f("%shang-%s",
+				      e->exec_id == 0 ? "basic-" : "",
+				      e->name) {
+				igt_skip_on_simulation();
+				igt_require(gem_has_ring(fd, e->exec_id | e->flags));
+				gem_quiescent_gpu(fd);
+				basic(fd, e->exec_id | e->flags, HANG);
+			}
+		}
+
+		igt_subtest_group {
+			igt_fixture {
+				igt_require(has_extended_busy_ioctl(fd));
+				gem_require_mmap_wc(fd);
+			}
+
+			for (e = intel_execution_engines; e->name; e++) {
+				/* default exec-id is purely symbolic */
+				if (e->exec_id == 0)
+					continue;
+
+				igt_subtest_f("extended-hang-%s", e->name) {
+					igt_skip_on_simulation();
+					igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+					igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
+
+					gem_quiescent_gpu(fd);
+					one(fd, e->exec_id | e->flags, HANG);
+					gem_quiescent_gpu(fd);
+				}
+			}
+		}
+
+		igt_fixture {
+			igt_disallow_hang(fd, hang);
+		}
+	}
+
+	igt_fixture {
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_caching.c b/tests/i915/gem_caching.c
new file mode 100644
index 00000000..09e1a5f9
--- /dev/null
+++ b/tests/i915/gem_caching.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Test snoop consistency when touching partial"
+		     " cachelines.");
+
+/*
+ * Testcase: snoop consistency when touching partial cachelines
+ *
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+drm_intel_bo *scratch_bo;
+drm_intel_bo *staging_bo;
+#define BO_SIZE (4*4096)
+uint32_t devid;
+int fd;
+
+static void
+copy_bo(drm_intel_bo *src, drm_intel_bo *dst)
+{
+	BLIT_COPY_BATCH_START(0);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  4096);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH((BO_SIZE/4096) << 16 | 1024);
+	OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(4096);
+	OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+static void
+blt_bo_fill(drm_intel_bo *tmp_bo, drm_intel_bo *bo, uint8_t val)
+{
+	uint8_t *gtt_ptr;
+	int i;
+
+	do_or_die(drm_intel_gem_bo_map_gtt(tmp_bo));
+	gtt_ptr = tmp_bo->virtual;
+
+	for (i = 0; i < BO_SIZE; i++)
+		gtt_ptr[i] = val;
+
+	drm_intel_gem_bo_unmap_gtt(tmp_bo);
+
+	igt_drop_caches_set(fd, DROP_BOUND);
+
+	copy_bo(tmp_bo, bo);
+}
+
+#define MAX_BLT_SIZE 128
+#define ROUNDS 1000
+#define TEST_READ 0x1
+#define TEST_WRITE 0x2
+#define TEST_BOTH (TEST_READ | TEST_WRITE)
+igt_main
+{
+	unsigned flags = TEST_BOTH;
+	int i, j;
+	uint8_t *cpu_ptr;
+	uint8_t *gtt_ptr;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		srandom(0xdeadbeef);
+
+		fd = drm_open_driver(DRIVER_INTEL);
+
+		igt_require_gem(fd);
+		gem_require_caching(fd);
+
+		devid = intel_get_drm_devid(fd);
+		if (IS_GEN2(devid)) /* chipset only handles cached -> uncached */
+			flags &= ~TEST_READ;
+		if (IS_BROADWATER(devid) || IS_CRESTLINE(devid)) {
+			/* chipset is completely fubar */
+			igt_info("coherency broken on i965g/gm\n");
+			flags = 0;
+		}
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+		/* overallocate the buffers we're actually using because */
+		scratch_bo = drm_intel_bo_alloc(bufmgr, "scratch bo", BO_SIZE, 4096);
+		gem_set_caching(fd, scratch_bo->handle, 1);
+
+		staging_bo = drm_intel_bo_alloc(bufmgr, "staging bo", BO_SIZE, 4096);
+	}
+
+	igt_subtest("reads") {
+		igt_require(flags & TEST_READ);
+
+		igt_info("checking partial reads\n");
+
+		for (i = 0; i < ROUNDS; i++) {
+			uint8_t val0 = i;
+			int start, len;
+
+			blt_bo_fill(staging_bo, scratch_bo, i);
+
+			start = random() % BO_SIZE;
+			len = random() % (BO_SIZE-start) + 1;
+
+			drm_intel_bo_map(scratch_bo, false);
+			cpu_ptr = scratch_bo->virtual;
+			for (j = 0; j < len; j++) {
+				igt_assert_f(cpu_ptr[j] == val0,
+					     "mismatch at %i, got: %i, expected: %i\n",
+					     j, cpu_ptr[j], val0);
+			}
+			drm_intel_bo_unmap(scratch_bo);
+
+			igt_progress("partial reads test: ", i, ROUNDS);
+		}
+	}
+
+	igt_subtest("writes") {
+		igt_require(flags & TEST_WRITE);
+
+		igt_info("checking partial writes\n");
+
+		for (i = 0; i < ROUNDS; i++) {
+			uint8_t val0 = i, val1;
+			int start, len;
+
+			blt_bo_fill(staging_bo, scratch_bo, val0);
+
+			start = random() % BO_SIZE;
+			len = random() % (BO_SIZE-start) + 1;
+
+			val1 = val0 + 63;
+			drm_intel_bo_map(scratch_bo, true);
+			cpu_ptr = scratch_bo->virtual;
+			memset(cpu_ptr + start, val1, len);
+			drm_intel_bo_unmap(scratch_bo);
+
+			copy_bo(scratch_bo, staging_bo);
+			do_or_die(drm_intel_gem_bo_map_gtt(staging_bo));
+			gtt_ptr = staging_bo->virtual;
+
+			for (j = 0; j < start; j++) {
+				igt_assert_f(gtt_ptr[j] == val0,
+					     "mismatch at %i, partial=[%d+%d] got: %i, expected: %i\n",
+					     j, start, len, gtt_ptr[j], val0);
+			}
+			for (; j < start + len; j++) {
+				igt_assert_f(gtt_ptr[j] == val1,
+					     "mismatch at %i, partial=[%d+%d] got: %i, expected: %i\n",
+					     j, start, len, gtt_ptr[j], val1);
+			}
+			for (; j < BO_SIZE; j++) {
+				igt_assert_f(gtt_ptr[j] == val0,
+					     "mismatch at %i, partial=[%d+%d] got: %i, expected: %i\n",
+					     j, start, len, gtt_ptr[j], val0);
+			}
+			drm_intel_gem_bo_unmap_gtt(staging_bo);
+
+			igt_progress("partial writes test: ", i, ROUNDS);
+		}
+	}
+
+	igt_subtest("read-writes") {
+		igt_require((flags & TEST_BOTH) == TEST_BOTH);
+
+		igt_info("checking partial writes after partial reads\n");
+
+		for (i = 0; i < ROUNDS; i++) {
+			uint8_t val0 = i, val1, val2;
+			int start, len;
+
+			blt_bo_fill(staging_bo, scratch_bo, val0);
+
+			/* partial read */
+			start = random() % BO_SIZE;
+			len = random() % (BO_SIZE-start) + 1;
+
+			do_or_die(drm_intel_bo_map(scratch_bo, false));
+			cpu_ptr = scratch_bo->virtual;
+			for (j = 0; j < len; j++) {
+				igt_assert_f(cpu_ptr[j] == val0,
+					     "mismatch in read at %i, got: %i, expected: %i\n",
+					     j, cpu_ptr[j], val0);
+			}
+			drm_intel_bo_unmap(scratch_bo);
+
+			/* Change contents through gtt to make the pread cachelines
+			 * stale. */
+			val1 = i + 17;
+			blt_bo_fill(staging_bo, scratch_bo, val1);
+
+			/* partial write */
+			start = random() % BO_SIZE;
+			len = random() % (BO_SIZE-start) + 1;
+
+			val2 = i + 63;
+			do_or_die(drm_intel_bo_map(scratch_bo, false));
+			cpu_ptr = scratch_bo->virtual;
+			memset(cpu_ptr + start, val2, len);
+
+			copy_bo(scratch_bo, staging_bo);
+			do_or_die(drm_intel_gem_bo_map_gtt(staging_bo));
+			gtt_ptr = staging_bo->virtual;
+
+			for (j = 0; j < start; j++) {
+				igt_assert_f(gtt_ptr[j] == val1,
+					     "mismatch at %i, partial=[%d+%d] got: %i, expected: %i\n",
+					     j, start, len, gtt_ptr[j], val1);
+			}
+			for (; j < start + len; j++) {
+				igt_assert_f(gtt_ptr[j] == val2,
+					     "mismatch at %i, partial=[%d+%d] got: %i, expected: %i\n",
+					     j, start, len, gtt_ptr[j], val2);
+			}
+			for (; j < BO_SIZE; j++) {
+				igt_assert_f(gtt_ptr[j] == val1,
+					     "mismatch at %i, partial=[%d+%d] got: %i, expected: %i\n",
+					     j, start, len, gtt_ptr[j], val1);
+			}
+			drm_intel_gem_bo_unmap_gtt(staging_bo);
+			drm_intel_bo_unmap(scratch_bo);
+
+			igt_progress("partial read/writes test: ", i, ROUNDS);
+		}
+	}
+
+	igt_fixture {
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_close.c b/tests/i915/gem_close.c
new file mode 100644
index 00000000..1d5fa305
--- /dev/null
+++ b/tests/i915/gem_close.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+
+static void test_many_handles(int fd)
+{
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	uint32_t clones[1024];
+	uint32_t original;
+
+	original = gem_create(fd, 4096);
+	gem_write(fd, original, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 1;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = original;
+	gem_execbuf(fd, &execbuf);
+
+	for (int i = 0; i < ARRAY_SIZE(clones); i++) {
+		clones[i] = gem_open(fd, gem_flink(fd, original));
+		obj[0].handle = clones[i];
+		gem_execbuf(fd, &execbuf);
+	}
+
+	/* We do not allow the sam object to be referenced multiple times
+	 * within an execbuf; hence why this practice of cloning a handle
+	 * is only found within test cases.
+	 */
+	execbuf.buffer_count = 2;
+	obj[0].handle = original;
+	for (int i = 0; i < ARRAY_SIZE(clones); i++) {
+		obj[1].handle = clones[i];
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+	/* Any other clone pair should also be detected */
+	obj[1].handle = clones[0];  /* (last, first) */
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	execbuf.buffer_count = 1;
+
+	/* Now close the original having used every clone */
+	obj[0].handle = original;
+	gem_close(fd, original);
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);
+
+	/* All clones should still be operational */
+	for (int i = 0; i < ARRAY_SIZE(clones); i++) {
+		obj[0].handle = clones[i];
+		gem_execbuf(fd, &execbuf);
+
+		/* ... until closed */
+		gem_close(fd, clones[i]);
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);
+	}
+}
+
+igt_main
+{
+	int fd = -1;
+
+	igt_fixture {
+		/* Create an flink requires DRM_AUTH */
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	igt_subtest("basic")
+		gem_close(fd, gem_create(fd, 4096));
+
+	igt_subtest("many-handles-one-vma")
+		test_many_handles(fd);
+}
diff --git a/tests/i915/gem_close_race.c b/tests/i915/gem_close_race.c
new file mode 100644
index 00000000..11d626dc
--- /dev/null
+++ b/tests/i915/gem_close_race.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <pthread.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include "drm.h"
+
+#define OBJECT_SIZE (256 * 1024)
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+
+static uint32_t devid;
+static bool has_64bit_relocations;
+
+#define gettid() syscall(__NR_gettid)
+#define sigev_notify_thread_id _sigev_un._tid
+
+static void selfcopy(int fd, uint32_t handle, int loops)
+{
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 gem_exec[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_pwrite gem_pwrite;
+	struct drm_i915_gem_create create;
+	uint32_t buf[16], *b = buf;
+
+	memset(reloc, 0, sizeof(reloc));
+
+	*b = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+	if (has_64bit_relocations)
+		*b += 2;
+	b++;
+	*b++ = 0xcc << 16 | 1 << 25 | 1 << 24 | (4*1024);
+	*b++ = 0;
+	*b++ = 1 << 16 | 1024;
+
+	reloc[0].offset = (b - buf) * sizeof(*b);
+	reloc[0].target_handle = handle;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	*b++ = 0;
+	if (has_64bit_relocations)
+		*b++ = 0;
+
+	*b++ = 512 << 16;
+	*b++ = 4*1024;
+
+	reloc[1].offset = (b - buf) * sizeof(*b);
+	reloc[1].target_handle = handle;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+	*b++ = 0;
+	if (has_64bit_relocations)
+		*b++ = 0;
+
+	*b++ = MI_BATCH_BUFFER_END;
+	*b++ = 0;
+
+	memset(gem_exec, 0, sizeof(gem_exec));
+	gem_exec[0].handle = handle;
+
+	memset(&create, 0, sizeof(create));
+	create.handle = 0;
+	create.size = 4096;
+	drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+	gem_exec[1].handle = create.handle;
+	gem_exec[1].relocation_count = 2;
+	gem_exec[1].relocs_ptr = to_user_pointer(reloc);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(gem_exec);
+	execbuf.buffer_count = 2;
+	execbuf.batch_len = (b - buf) * sizeof(*b);
+	if (HAS_BLT_RING(devid))
+		execbuf.flags |= I915_EXEC_BLT;
+
+	memset(&gem_pwrite, 0, sizeof(gem_pwrite));
+	gem_pwrite.handle = create.handle;
+	gem_pwrite.offset = 0;
+	gem_pwrite.size = sizeof(buf);
+	gem_pwrite.data_ptr = to_user_pointer(buf);
+	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &gem_pwrite) == 0) {
+		while (loops-- && __gem_execbuf(fd, &execbuf) == 0)
+			;
+	}
+
+	drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &create.handle);
+}
+
+static uint32_t load(int fd)
+{
+	uint32_t handle;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	if (handle == 0)
+		return 0;
+
+	selfcopy(fd, handle, 100);
+	return handle;
+}
+
+static void process(int child)
+{
+	uint32_t handle;
+	int fd;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	handle = load(fd);
+	if ((child & 63) == 63)
+		gem_read(fd, handle, 0, &handle, sizeof(handle));
+
+	gem_quiescent_gpu(fd);
+}
+
+struct crashme {
+	int fd;
+} crashme;
+
+static void crashme_now(int sig)
+{
+	close(crashme.fd);
+}
+
+#define usec(x) (1000*(x))
+#define msec(x) usec(1000*(x))
+
+static void threads(int timeout)
+{
+	struct sigevent sev;
+	struct sigaction act;
+	struct drm_gem_open name;
+	struct itimerspec its;
+	timer_t timer;
+	int fd;
+
+	memset(&act, 0, sizeof(act));
+	act.sa_handler = crashme_now;
+	igt_assert(sigaction(SIGRTMIN, &act, NULL) == 0);
+
+	memset(&sev, 0, sizeof(sev));
+	sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID;
+	sev.sigev_notify_thread_id = gettid();
+	sev.sigev_signo = SIGRTMIN;
+	igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0);
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	name.name = gem_flink(fd, gem_create(fd, OBJECT_SIZE));
+
+	igt_until_timeout(timeout) {
+		crashme.fd = drm_open_driver(DRIVER_INTEL);
+
+		memset(&its, 0, sizeof(its));
+		its.it_value.tv_nsec = msec(1) + (rand() % msec(10));
+		igt_assert(timer_settime(timer, 0, &its, NULL) == 0);
+
+		do {
+			if (drmIoctl(crashme.fd, DRM_IOCTL_GEM_OPEN, &name))
+				break;
+
+			selfcopy(crashme.fd, name.handle, 100);
+			drmIoctl(crashme.fd, DRM_IOCTL_GEM_CLOSE, &name.handle);
+		} while (1);
+
+		close(crashme.fd);
+	}
+
+	timer_delete(timer);
+
+	gem_quiescent_gpu(fd);
+	close(fd);
+}
+
+igt_main
+{
+	igt_fixture {
+		int fd;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		devid = intel_get_drm_devid(fd);
+		has_64bit_relocations = intel_gen(devid) >= 8;
+
+		igt_fork_hang_detector(fd);
+		close(fd);
+	}
+
+	igt_subtest("basic-process") {
+		igt_fork(child, 1)
+			process(child);
+		igt_waitchildren();
+	}
+
+	igt_subtest("basic-threads")
+		threads(1);
+
+	igt_subtest("process-exit") {
+		igt_fork(child, 768)
+			process(child);
+		igt_waitchildren();
+	}
+
+	igt_subtest("gem-close-race")
+		threads(150);
+
+	igt_fixture
+	    igt_stop_hang_detector();
+}
diff --git a/tests/i915/gem_concurrent_all.c b/tests/i915/gem_concurrent_all.c
new file mode 100644
index 00000000..4ac08c1b
--- /dev/null
+++ b/tests/i915/gem_concurrent_all.c
@@ -0,0 +1,1940 @@
+/*
+ * Copyright © 2009,2012,2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/** @file gem_concurrent.c
+ *
+ * This is a test of pread/pwrite/mmap behavior when writing to active
+ * buffers.
+ *
+ * Based on gem_gtt_concurrent_blt.
+ */
+
+#include "igt.h"
+#include "igt_vgem.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Test of pread/pwrite/mmap behavior when writing to active"
+		     " buffers.");
+
+int fd, devid, gen;
+int vgem_drv = -1;
+int all;
+int pass;
+
+struct create {
+	const char *name;
+	void (*require)(const struct create *, unsigned);
+	drm_intel_bo *(*create)(drm_intel_bufmgr *, uint64_t size);
+};
+
+struct size {
+	const char *name;
+	int width, height;
+};
+
+struct buffers {
+	const char *name;
+	const struct create *create;
+	const struct access_mode *mode;
+	const struct size *size;
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	drm_intel_bo **src, **dst;
+	drm_intel_bo *snoop, *spare;
+	uint32_t *tmp;
+	int width, height, npixels, page_size;
+	int count, num_buffers;
+};
+
+#define MIN_BUFFERS 3
+
+static void blt_copy_bo(struct buffers *b, drm_intel_bo *dst, drm_intel_bo *src);
+
+static void
+nop_release_bo(drm_intel_bo *bo)
+{
+	drm_intel_bo_unreference(bo);
+}
+
+static void
+prw_set_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	for (int i = 0; i < b->npixels; i++)
+		b->tmp[i] = val;
+	drm_intel_bo_subdata(bo, 0, 4*b->npixels, b->tmp);
+}
+
+static void
+prw_cmp_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	uint32_t *vaddr;
+
+	vaddr = b->tmp;
+	do_or_die(drm_intel_bo_get_subdata(bo, 0, 4*b->npixels, vaddr));
+	for (int i = 0; i < b->npixels; i++)
+		igt_assert_eq_u32(vaddr[i], val);
+}
+
+#define pixel(y, width) ((y)*(width) + (((y) + pass)%(width)))
+
+static void
+partial_set_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	for (int y = 0; y < b->height; y++)
+		do_or_die(drm_intel_bo_subdata(bo, 4*pixel(y, b->width), 4, &val));
+}
+
+static void
+partial_cmp_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	for (int y = 0; y < b->height; y++) {
+		uint32_t buf;
+		do_or_die(drm_intel_bo_get_subdata(bo, 4*pixel(y, b->width), 4, &buf));
+		igt_assert_eq_u32(buf, val);
+	}
+}
+
+static drm_intel_bo *
+create_normal_bo(drm_intel_bufmgr *bufmgr, uint64_t size)
+{
+	drm_intel_bo *bo;
+
+	bo = drm_intel_bo_alloc(bufmgr, "bo", size, 0);
+	igt_assert(bo);
+
+	return bo;
+}
+
+static void can_create_normal(const struct create *create, unsigned count)
+{
+}
+
+#if HAVE_CREATE_PRIVATE
+static drm_intel_bo *
+create_private_bo(drm_intel_bufmgr *bufmgr, uint64_t size)
+{
+	drm_intel_bo *bo;
+	uint32_t handle;
+
+	/* XXX gem_create_with_flags(fd, size, I915_CREATE_PRIVATE); */
+
+	handle = gem_create(fd, size);
+	bo = gem_handle_to_libdrm_bo(bufmgr, fd, "stolen", handle);
+	gem_close(fd, handle);
+
+	return bo;
+}
+
+static void can_create_private(const struct create *create, unsigned count)
+{
+	igt_require(0);
+}
+#endif
+
+#if HAVE_CREATE_STOLEN
+static drm_intel_bo *
+create_stolen_bo(drm_intel_bufmgr *bufmgr, uint64_t size)
+{
+	drm_intel_bo *bo;
+	uint32_t handle;
+
+	/* XXX gem_create_with_flags(fd, size, I915_CREATE_STOLEN); */
+
+	handle = gem_create(fd, size);
+	bo = gem_handle_to_libdrm_bo(bufmgr, fd, "stolen", handle);
+	gem_close(fd, handle);
+
+	return bo;
+}
+
+static void can_create_stolen(const struct create *create, unsigned count)
+{
+	/* XXX check num_buffers against available stolen */
+	igt_require(0);
+}
+#endif
+
+static void create_cpu_require(const struct create *create, unsigned count)
+{
+#if HAVE_CREATE_STOLEN
+	igt_require(create->create != create_stolen_bo);
+#endif
+}
+
+static drm_intel_bo *
+unmapped_create_bo(const struct buffers *b)
+{
+	return b->create->create(b->bufmgr, 4*b->npixels);
+}
+
+static void create_snoop_require(const struct create *create, unsigned count)
+{
+	create_cpu_require(create, count);
+	igt_require(!gem_has_llc(fd));
+}
+
+static drm_intel_bo *
+snoop_create_bo(const struct buffers *b)
+{
+	drm_intel_bo *bo;
+
+	bo = unmapped_create_bo(b);
+	gem_set_caching(fd, bo->handle, I915_CACHING_CACHED);
+	drm_intel_bo_disable_reuse(bo);
+
+	return bo;
+}
+
+static void create_userptr_require(const struct create *create, unsigned count)
+{
+	static int has_userptr = -1;
+	if (has_userptr < 0) {
+		struct drm_i915_gem_userptr arg;
+
+		has_userptr = 0;
+
+		memset(&arg, 0, sizeof(arg));
+		arg.user_ptr = -4096ULL;
+		arg.user_size = 8192;
+		errno = 0;
+		drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg);
+		if (errno == EFAULT) {
+			igt_assert(posix_memalign((void **)&arg.user_ptr,
+						  4096, arg.user_size) == 0);
+			has_userptr = drmIoctl(fd,
+					 LOCAL_IOCTL_I915_GEM_USERPTR,
+					 &arg) == 0;
+			free(from_user_pointer(arg.user_ptr));
+		}
+
+	}
+	igt_require(has_userptr);
+}
+
+static drm_intel_bo *
+userptr_create_bo(const struct buffers *b)
+{
+	struct local_i915_gem_userptr userptr;
+	drm_intel_bo *bo;
+	void *ptr;
+
+	memset(&userptr, 0, sizeof(userptr));
+	userptr.user_size = b->page_size;
+
+	ptr = mmap(NULL, userptr.user_size,
+		   PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0);
+	igt_assert(ptr != (void *)-1);
+	userptr.user_ptr = to_user_pointer(ptr);
+
+#if 0
+	do_or_die(drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr));
+	bo = gem_handle_to_libdrm_bo(b->bufmgr, fd, "userptr", userptr.handle);
+	gem_close(fd, userptr.handle);
+#else
+	bo = drm_intel_bo_alloc_userptr(b->bufmgr, "name",
+					ptr, I915_TILING_NONE, 0,
+					userptr.user_size, 0);
+	igt_assert(bo);
+#endif
+	bo->virtual = from_user_pointer(userptr.user_ptr);
+
+	return bo;
+}
+
+static void
+userptr_set_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	int size = b->npixels;
+	uint32_t *vaddr = bo->virtual;
+
+	gem_set_domain(fd, bo->handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	while (size--)
+		*vaddr++ = val;
+}
+
+static void
+userptr_cmp_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	int size =  b->npixels;
+	uint32_t *vaddr = bo->virtual;
+
+	gem_set_domain(fd, bo->handle,
+		       I915_GEM_DOMAIN_CPU, 0);
+	while (size--)
+		igt_assert_eq_u32(*vaddr++, val);
+}
+
+static void
+userptr_release_bo(drm_intel_bo *bo)
+{
+	igt_assert(bo->virtual);
+
+	munmap(bo->virtual, bo->size);
+	bo->virtual = NULL;
+
+	drm_intel_bo_unreference(bo);
+}
+
+static void create_dmabuf_require(const struct create *create, unsigned count)
+{
+	static int has_dmabuf = -1;
+	if (has_dmabuf < 0) {
+		struct drm_prime_handle args;
+		void *ptr;
+
+		memset(&args, 0, sizeof(args));
+		args.handle = gem_create(fd, 4096);
+		args.flags = DRM_RDWR;
+		args.fd = -1;
+
+		drmIoctl(fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
+		gem_close(fd, args.handle);
+
+		has_dmabuf = 0;
+		ptr = mmap(NULL, 4096, PROT_READ, MAP_SHARED, args.fd, 0);
+		if (ptr != MAP_FAILED) {
+			has_dmabuf = 1;
+			munmap(ptr, 4096);
+		}
+
+		close(args.fd);
+	}
+	igt_require(has_dmabuf);
+	intel_require_files(2*count);
+}
+
+struct dmabuf {
+	int fd;
+	void *map;
+};
+
+static drm_intel_bo *
+dmabuf_create_bo(const struct buffers *b)
+{
+	struct drm_prime_handle args;
+	drm_intel_bo *bo;
+	struct dmabuf *dmabuf;
+	int size;
+
+	size = b->page_size;
+
+	memset(&args, 0, sizeof(args));
+	args.handle = gem_create(fd, size);
+	args.flags = DRM_RDWR;
+	args.fd = -1;
+
+	do_ioctl(fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
+	gem_close(fd, args.handle);
+
+	bo = drm_intel_bo_gem_create_from_prime(b->bufmgr, args.fd, size);
+	igt_assert(bo);
+
+	dmabuf = malloc(sizeof(*dmabuf));
+	igt_assert(dmabuf);
+
+	dmabuf->fd = args.fd;
+	dmabuf->map = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE, MAP_SHARED,
+			   dmabuf->fd, 0);
+	igt_assert(dmabuf->map != (void *)-1);
+
+	bo->virtual = dmabuf;
+
+	return bo;
+}
+
+static void
+dmabuf_set_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	struct dmabuf *dmabuf = bo->virtual;
+	uint32_t *v = dmabuf->map;
+	int y;
+
+	prime_sync_start(dmabuf->fd, true);
+	for (y = 0; y < b->height; y++)
+		v[pixel(y, b->width)] = val;
+	prime_sync_end(dmabuf->fd, true);
+}
+
+static void
+dmabuf_cmp_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	struct dmabuf *dmabuf = bo->virtual;
+	uint32_t *v = dmabuf->map;
+	int y;
+
+	prime_sync_start(dmabuf->fd, false);
+	for (y = 0; y < b->height; y++)
+		igt_assert_eq_u32(v[pixel(y, b->width)], val);
+	prime_sync_end(dmabuf->fd, false);
+}
+
+static void
+dmabuf_release_bo(drm_intel_bo *bo)
+{
+	struct dmabuf *dmabuf = bo->virtual;
+	igt_assert(dmabuf);
+
+	munmap(dmabuf->map, bo->size);
+	close(dmabuf->fd);
+	free(dmabuf);
+
+	bo->virtual = NULL;
+	drm_intel_bo_unreference(bo);
+}
+
+static bool has_prime_export(int _fd)
+{
+	uint64_t value;
+
+	if (drmGetCap(_fd, DRM_CAP_PRIME, &value))
+		return false;
+
+	return value & DRM_PRIME_CAP_EXPORT;
+}
+
+static void create_vgem_require(const struct create *create, unsigned count)
+{
+	igt_require(vgem_drv != -1);
+	igt_require(has_prime_export(vgem_drv));
+	create_dmabuf_require(create, count);
+}
+
+static drm_intel_bo *
+vgem_create_bo(const struct buffers *b)
+{
+	struct drm_prime_handle args;
+	drm_intel_bo *bo;
+	struct vgem_bo vgem;
+	struct dmabuf *dmabuf;
+
+	igt_assert(vgem_drv != -1);
+
+	vgem.width = b->width;
+	vgem.height = b->height;
+	vgem.bpp = 32;
+	vgem_create(vgem_drv, &vgem);
+
+	memset(&args, 0, sizeof(args));
+	args.handle = vgem.handle;
+	args.flags = DRM_RDWR;
+	args.fd = -1;
+
+	do_ioctl(vgem_drv, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
+	gem_close(vgem_drv, args.handle);
+	igt_assert(args.fd != -1);
+
+	bo = drm_intel_bo_gem_create_from_prime(b->bufmgr, args.fd, vgem.size);
+	igt_assert(bo);
+
+	dmabuf = malloc(sizeof(*dmabuf));
+	igt_assert(dmabuf);
+
+	dmabuf->fd = args.fd;
+	dmabuf->map = mmap(NULL, vgem.size,
+			   PROT_READ | PROT_WRITE, MAP_SHARED,
+			   dmabuf->fd, 0);
+	igt_assert(dmabuf->map != (void *)-1);
+
+	bo->virtual = dmabuf;
+
+	return bo;
+}
+
+static void
+gtt_set_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	uint32_t *vaddr = bo->virtual;
+
+	drm_intel_gem_bo_start_gtt_access(bo, true);
+	for (int y = 0; y < b->height; y++)
+		vaddr[pixel(y, b->width)] = val;
+}
+
+static void
+gtt_cmp_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	uint32_t *vaddr = bo->virtual;
+
+	/* GTT access is slow. So we just compare a few points */
+	drm_intel_gem_bo_start_gtt_access(bo, false);
+	for (int y = 0; y < b->height; y++)
+		igt_assert_eq_u32(vaddr[pixel(y, b->width)], val);
+}
+
+static drm_intel_bo *
+map_bo(drm_intel_bo *bo)
+{
+	/* gtt map doesn't have a write parameter, so just keep the mapping
+	 * around (to avoid the set_domain with the gtt write domain set) and
+	 * manually tell the kernel when we start access the gtt. */
+	do_or_die(drm_intel_gem_bo_map_gtt(bo));
+
+	return bo;
+}
+
+static drm_intel_bo *
+tile_bo(drm_intel_bo *bo, int width)
+{
+	uint32_t tiling = I915_TILING_X;
+	uint32_t stride = width * 4;
+
+	do_or_die(drm_intel_bo_set_tiling(bo, &tiling, stride));
+
+	return bo;
+}
+
+static drm_intel_bo *
+gtt_create_bo(const struct buffers *b)
+{
+	return map_bo(unmapped_create_bo(b));
+}
+
+static drm_intel_bo *
+gttX_create_bo(const struct buffers *b)
+{
+	return tile_bo(gtt_create_bo(b), b->width);
+}
+
+static void bit17_require(void)
+{
+	static struct drm_i915_gem_get_tiling2 {
+		uint32_t handle;
+		uint32_t tiling_mode;
+		uint32_t swizzle_mode;
+		uint32_t phys_swizzle_mode;
+	} arg;
+#define DRM_IOCTL_I915_GEM_GET_TILING2	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling2)
+
+	if (arg.handle == 0) {
+		arg.handle = gem_create(fd, 4096);
+		gem_set_tiling(fd, arg.handle, I915_TILING_X, 512);
+
+		do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING2, &arg);
+		gem_close(fd, arg.handle);
+	}
+	igt_require(arg.phys_swizzle_mode == arg.swizzle_mode);
+}
+
+static void wc_require(void)
+{
+	bit17_require();
+	gem_require_mmap_wc(fd);
+}
+
+static void
+wc_create_require(const struct create *create, unsigned count)
+{
+	wc_require();
+}
+
+static drm_intel_bo *
+wc_create_bo(const struct buffers *b)
+{
+	drm_intel_bo *bo;
+
+	bo = unmapped_create_bo(b);
+	bo->virtual = gem_mmap__wc(fd, bo->handle, 0, bo->size, PROT_READ | PROT_WRITE);
+	return bo;
+}
+
+static void
+wc_release_bo(drm_intel_bo *bo)
+{
+	igt_assert(bo->virtual);
+
+	munmap(bo->virtual, bo->size);
+	bo->virtual = NULL;
+
+	nop_release_bo(bo);
+}
+
+static drm_intel_bo *
+gpu_create_bo(const struct buffers *b)
+{
+	return unmapped_create_bo(b);
+}
+
+static drm_intel_bo *
+gpuX_create_bo(const struct buffers *b)
+{
+	return tile_bo(gpu_create_bo(b), b->width);
+}
+
+static void
+cpu_set_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	int size = b->npixels;
+	uint32_t *vaddr;
+
+	do_or_die(drm_intel_bo_map(bo, true));
+	vaddr = bo->virtual;
+	while (size--)
+		*vaddr++ = val;
+	drm_intel_bo_unmap(bo);
+}
+
+static void
+cpu_cmp_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	int size = b->npixels;
+	uint32_t *vaddr;
+
+	do_or_die(drm_intel_bo_map(bo, false));
+	vaddr = bo->virtual;
+	while (size--)
+		igt_assert_eq_u32(*vaddr++, val);
+	drm_intel_bo_unmap(bo);
+}
+
+static void
+gpu_set_bo(struct buffers *buffers, drm_intel_bo *bo, uint32_t val)
+{
+	struct drm_i915_gem_relocation_entry reloc[1];
+	struct drm_i915_gem_exec_object2 gem_exec[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t buf[10], *b;
+	uint32_t tiling, swizzle;
+
+	drm_intel_bo_get_tiling(bo, &tiling, &swizzle);
+
+	memset(reloc, 0, sizeof(reloc));
+	memset(gem_exec, 0, sizeof(gem_exec));
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	b = buf;
+	*b++ = XY_COLOR_BLT_CMD_NOLEN |
+		((gen >= 8) ? 5 : 4) |
+		COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+	if (gen >= 4 && tiling) {
+		b[-1] |= XY_COLOR_BLT_TILED;
+		*b = buffers->width;
+	} else
+		*b = buffers->width << 2;
+	*b++ |= 0xf0 << 16 | 1 << 25 | 1 << 24;
+	*b++ = 0;
+	*b++ = buffers->height << 16 | buffers->width;
+	reloc[0].offset = (b - buf) * sizeof(uint32_t);
+	reloc[0].target_handle = bo->handle;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	*b++ = 0;
+	if (gen >= 8)
+		*b++ = 0;
+	*b++ = val;
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - buf) & 1)
+		*b++ = 0;
+
+	gem_exec[0].handle = bo->handle;
+	gem_exec[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	gem_exec[1].handle = gem_create(fd, 4096);
+	gem_exec[1].relocation_count = 1;
+	gem_exec[1].relocs_ptr = to_user_pointer(reloc);
+
+	execbuf.buffers_ptr = to_user_pointer(gem_exec);
+	execbuf.buffer_count = 2;
+	execbuf.batch_len = (b - buf) * sizeof(buf[0]);
+	if (gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	gem_write(fd, gem_exec[1].handle, 0, buf, execbuf.batch_len);
+	gem_execbuf(fd, &execbuf);
+
+	gem_close(fd, gem_exec[1].handle);
+}
+
+static void
+gpu_cmp_bo(struct buffers *b, drm_intel_bo *bo, uint32_t val)
+{
+	blt_copy_bo(b, b->snoop, bo);
+	cpu_cmp_bo(b, b->snoop, val);
+}
+
+struct access_mode {
+	const char *name;
+	void (*require)(const struct create *, unsigned);
+	drm_intel_bo *(*create_bo)(const struct buffers *b);
+	void (*set_bo)(struct buffers *b, drm_intel_bo *bo, uint32_t val);
+	void (*cmp_bo)(struct buffers *b, drm_intel_bo *bo, uint32_t val);
+	void (*release_bo)(drm_intel_bo *bo);
+};
+igt_render_copyfunc_t rendercopy;
+
+static int read_sysctl(const char *path)
+{
+	FILE *file = fopen(path, "r");
+	int max = 0;
+	if (file) {
+		if (fscanf(file, "%d", &max) != 1)
+			max = 0; /* silence! */
+		fclose(file);
+	}
+	return max;
+}
+
+static int write_sysctl(const char *path, int value)
+{
+	FILE *file = fopen(path, "w");
+	if (file) {
+		fprintf(file, "%d", value);
+		fclose(file);
+	}
+	return read_sysctl(path);
+}
+
+static bool set_max_map_count(int num_buffers)
+{
+	int max = read_sysctl("/proc/sys/vm/max_map_count");
+	if (max < num_buffers + 1024)
+		max = write_sysctl("/proc/sys/vm/max_map_count",
+				   num_buffers + 1024);
+	return max > num_buffers;
+}
+
+static void buffers_init(struct buffers *b,
+			 const char *name,
+			 const struct create *create,
+			 const struct access_mode *mode,
+			 const struct size *size,
+			 int num_buffers,
+			 int _fd, int enable_reuse)
+{
+	memset(b, 0, sizeof(*b));
+	b->name = name;
+	b->create = create;
+	b->mode = mode;
+	b->size = size;
+	b->num_buffers = num_buffers;
+	b->count = 0;
+
+	b->width = size->width;
+	b->height = size->height;
+	b->npixels = size->width * size->height;
+	b->page_size = 4*b->npixels;
+	b->page_size = (b->page_size + 4095) & -4096;
+	b->tmp = malloc(b->page_size);
+	igt_assert(b->tmp);
+
+	b->bufmgr = drm_intel_bufmgr_gem_init(_fd, 4096);
+	igt_assert(b->bufmgr);
+
+	b->src = malloc(2*sizeof(drm_intel_bo *)*num_buffers);
+	igt_assert(b->src);
+	b->dst = b->src + num_buffers;
+
+	if (enable_reuse)
+		drm_intel_bufmgr_gem_enable_reuse(b->bufmgr);
+	b->batch = intel_batchbuffer_alloc(b->bufmgr, devid);
+	igt_assert(b->batch);
+}
+
+static void buffers_destroy(struct buffers *b)
+{
+	int count = b->count;
+	if (count == 0)
+		return;
+
+	/* Be safe so that we can clean up a partial creation */
+	b->count = 0;
+	for (int i = 0; i < count; i++) {
+		if (b->src[i]) {
+			b->mode->release_bo(b->src[i]);
+			b->src[i] = NULL;
+		} else
+			break;
+
+		if (b->dst[i]) {
+			b->mode->release_bo(b->dst[i]);
+			b->dst[i] = NULL;
+		}
+	}
+	if (b->snoop) {
+		nop_release_bo(b->snoop);
+		b->snoop = NULL;
+	}
+	if (b->spare) {
+		b->mode->release_bo(b->spare);
+		b->spare = NULL;
+	}
+}
+
+static void buffers_create(struct buffers *b)
+{
+	int count = b->num_buffers;
+	igt_assert(b->bufmgr);
+
+	buffers_destroy(b);
+	igt_assert(b->count == 0);
+	b->count = count;
+
+	for (int i = 0; i < count; i++) {
+		b->src[i] = b->mode->create_bo(b);
+		b->dst[i] = b->mode->create_bo(b);
+	}
+	b->spare = b->mode->create_bo(b);
+	b->snoop = snoop_create_bo(b);
+}
+
+static void buffers_reset(struct buffers *b, bool enable_reuse)
+{
+	b->bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	igt_assert(b->bufmgr);
+
+	if (enable_reuse)
+		drm_intel_bufmgr_gem_enable_reuse(b->bufmgr);
+	b->batch = intel_batchbuffer_alloc(b->bufmgr, devid);
+	igt_assert(b->batch);
+}
+
+static void buffers_fini(struct buffers *b)
+{
+	if (b->bufmgr == NULL)
+		return;
+
+	buffers_destroy(b);
+
+	free(b->tmp);
+	free(b->src);
+
+	intel_batchbuffer_free(b->batch);
+	drm_intel_bufmgr_destroy(b->bufmgr);
+
+	memset(b, 0, sizeof(*b));
+}
+
+typedef void (*do_copy)(struct buffers *b, drm_intel_bo *dst, drm_intel_bo *src);
+typedef igt_hang_t (*do_hang)(void);
+
+static void render_copy_bo(struct buffers *b, drm_intel_bo *dst, drm_intel_bo *src)
+{
+	struct igt_buf d = {
+		.bo = dst,
+		.size = b->npixels * 4,
+		.num_tiles = b->npixels * 4,
+		.stride = b->width * 4,
+	}, s = {
+		.bo = src,
+		.size = b->npixels * 4,
+		.num_tiles = b->npixels * 4,
+		.stride = b->width * 4,
+	};
+	uint32_t swizzle;
+
+	drm_intel_bo_get_tiling(dst, &d.tiling, &swizzle);
+	drm_intel_bo_get_tiling(src, &s.tiling, &swizzle);
+
+	rendercopy(b->batch, NULL,
+		   &s, 0, 0,
+		   b->width, b->height,
+		   &d, 0, 0);
+}
+
+static void blt_copy_bo(struct buffers *b, drm_intel_bo *dst, drm_intel_bo *src)
+{
+	intel_blt_copy(b->batch,
+		       src, 0, 0, 4*b->width,
+		       dst, 0, 0, 4*b->width,
+		       b->width, b->height, 32);
+}
+
+static void cpu_copy_bo(struct buffers *b, drm_intel_bo *dst, drm_intel_bo *src)
+{
+	const int size = b->page_size;
+	void *d, *s;
+
+	gem_set_domain(fd, src->handle, I915_GEM_DOMAIN_CPU, 0);
+	gem_set_domain(fd, dst->handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	s = gem_mmap__cpu(fd, src->handle, 0, size, PROT_READ);
+	d = gem_mmap__cpu(fd, dst->handle, 0, size, PROT_WRITE);
+
+	memcpy(d, s, size);
+
+	munmap(d, size);
+	munmap(s, size);
+}
+
+static void gtt_copy_bo(struct buffers *b, drm_intel_bo *dst, drm_intel_bo *src)
+{
+	const int size = b->page_size;
+	void *d, *s;
+
+	gem_set_domain(fd, src->handle, I915_GEM_DOMAIN_GTT, 0);
+	gem_set_domain(fd, dst->handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	s = gem_mmap__gtt(fd, src->handle, size, PROT_READ);
+	d = gem_mmap__gtt(fd, dst->handle, size, PROT_WRITE);
+
+	memcpy(d, s, size);
+
+	munmap(d, size);
+	munmap(s, size);
+}
+
+static void wc_copy_bo(struct buffers *b, drm_intel_bo *dst, drm_intel_bo *src)
+{
+	const int size = b->page_size;
+	void *d, *s;
+
+	gem_set_domain(fd, src->handle, I915_GEM_DOMAIN_WC, 0);
+	gem_set_domain(fd, dst->handle, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+	s = gem_mmap__wc(fd, src->handle, 0, size, PROT_READ);
+	d = gem_mmap__wc(fd, dst->handle, 0, size, PROT_WRITE);
+
+	memcpy(d, s, size);
+
+	munmap(d, size);
+	munmap(s, size);
+}
+
+static igt_hang_t no_hang(void)
+{
+	return (igt_hang_t){0, 0};
+}
+
+static igt_hang_t bcs_hang(void)
+{
+	return igt_hang_ring(fd, I915_EXEC_BLT);
+}
+
+static igt_hang_t rcs_hang(void)
+{
+	return igt_hang_ring(fd, I915_EXEC_RENDER);
+}
+
+static igt_hang_t all_hang(void)
+{
+	igt_hang_t hang = igt_hang_ring(fd, I915_EXEC_RENDER);
+	unsigned engine;
+
+	for_each_physical_engine(fd, engine) {
+		struct drm_i915_gem_execbuffer2 eb = hang.spin->execbuf;
+
+		if (engine == I915_EXEC_RENDER)
+			continue;
+
+		eb.flags = engine;
+		__gem_execbuf(fd, &eb);
+	}
+
+	return hang;
+}
+
+static void do_basic0(struct buffers *buffers,
+		      do_copy do_copy_func,
+		      do_hang do_hang_func)
+{
+	gem_quiescent_gpu(fd);
+
+	buffers->mode->set_bo(buffers, buffers->src[0], 0xdeadbeef);
+	for (int i = 0; i < buffers->count; i++) {
+		igt_hang_t hang = do_hang_func();
+
+		do_copy_func(buffers, buffers->dst[i], buffers->src[0]);
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], 0xdeadbeef);
+
+		igt_post_hang_ring(fd, hang);
+	}
+}
+
+static void do_basic1(struct buffers *buffers,
+		      do_copy do_copy_func,
+		      do_hang do_hang_func)
+{
+	gem_quiescent_gpu(fd);
+
+	for (int i = 0; i < buffers->count; i++) {
+		igt_hang_t hang = do_hang_func();
+
+		buffers->mode->set_bo(buffers, buffers->src[i], i);
+		buffers->mode->set_bo(buffers, buffers->dst[i], ~i);
+
+		do_copy_func(buffers, buffers->dst[i], buffers->src[i]);
+		usleep(0); /* let someone else claim the mutex */
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], i);
+
+		igt_post_hang_ring(fd, hang);
+	}
+}
+
+static void do_basicN(struct buffers *buffers,
+		      do_copy do_copy_func,
+		      do_hang do_hang_func)
+{
+	igt_hang_t hang;
+
+	gem_quiescent_gpu(fd);
+
+	for (int i = 0; i < buffers->count; i++) {
+		buffers->mode->set_bo(buffers, buffers->src[i], i);
+		buffers->mode->set_bo(buffers, buffers->dst[i], ~i);
+	}
+
+	hang = do_hang_func();
+
+	for (int i = 0; i < buffers->count; i++) {
+		do_copy_func(buffers, buffers->dst[i], buffers->src[i]);
+		usleep(0); /* let someone else claim the mutex */
+	}
+
+	for (int i = 0; i < buffers->count; i++)
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], i);
+
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_overwrite_source(struct buffers *buffers,
+				do_copy do_copy_func,
+				do_hang do_hang_func)
+{
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = 0; i < buffers->count; i++) {
+		buffers->mode->set_bo(buffers, buffers->src[i], i);
+		buffers->mode->set_bo(buffers, buffers->dst[i], ~i);
+	}
+	for (i = 0; i < buffers->count; i++)
+		do_copy_func(buffers, buffers->dst[i], buffers->src[i]);
+	hang = do_hang_func();
+	for (i = buffers->count; i--; )
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xdeadbeef);
+	for (i = 0; i < buffers->count; i++)
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], i);
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_overwrite_source_read(struct buffers *buffers,
+				     do_copy do_copy_func,
+				     do_hang do_hang_func,
+				     int do_rcs)
+{
+	const int half = buffers->count/2;
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = 0; i < half; i++) {
+		buffers->mode->set_bo(buffers, buffers->src[i], i);
+		buffers->mode->set_bo(buffers, buffers->dst[i], ~i);
+		buffers->mode->set_bo(buffers, buffers->dst[i+half], ~i);
+	}
+	for (i = 0; i < half; i++) {
+		do_copy_func(buffers, buffers->dst[i], buffers->src[i]);
+		if (do_rcs)
+			render_copy_bo(buffers, buffers->dst[i+half], buffers->src[i]);
+		else
+			blt_copy_bo(buffers, buffers->dst[i+half], buffers->src[i]);
+	}
+	hang = do_hang_func();
+	for (i = half; i--; )
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xdeadbeef);
+	for (i = 0; i < half; i++) {
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], i);
+		buffers->mode->cmp_bo(buffers, buffers->dst[i+half], i);
+	}
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_overwrite_source_read_bcs(struct buffers *buffers,
+					 do_copy do_copy_func,
+					 do_hang do_hang_func)
+{
+	do_overwrite_source_read(buffers, do_copy_func, do_hang_func, 0);
+}
+
+static void do_overwrite_source_read_rcs(struct buffers *buffers,
+					 do_copy do_copy_func,
+					 do_hang do_hang_func)
+{
+	do_overwrite_source_read(buffers, do_copy_func, do_hang_func, 1);
+}
+
+static void do_overwrite_source__rev(struct buffers *buffers,
+				     do_copy do_copy_func,
+				     do_hang do_hang_func)
+{
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = 0; i < buffers->count; i++) {
+		buffers->mode->set_bo(buffers, buffers->src[i], i);
+		buffers->mode->set_bo(buffers, buffers->dst[i], ~i);
+	}
+	for (i = 0; i < buffers->count; i++)
+		do_copy_func(buffers, buffers->dst[i], buffers->src[i]);
+	hang = do_hang_func();
+	for (i = 0; i < buffers->count; i++)
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xdeadbeef);
+	for (i = buffers->count; i--; )
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], i);
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_overwrite_source__one(struct buffers *buffers,
+				     do_copy do_copy_func,
+				     do_hang do_hang_func)
+{
+	igt_hang_t hang;
+
+	gem_quiescent_gpu(fd);
+	buffers->mode->set_bo(buffers, buffers->src[0], 0);
+	buffers->mode->set_bo(buffers, buffers->dst[0], ~0);
+	do_copy_func(buffers, buffers->dst[0], buffers->src[0]);
+	hang = do_hang_func();
+	buffers->mode->set_bo(buffers, buffers->src[0], 0xdeadbeef);
+	buffers->mode->cmp_bo(buffers, buffers->dst[0], 0);
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_intermix(struct buffers *buffers,
+			do_copy do_copy_func,
+			do_hang do_hang_func,
+			int do_rcs)
+{
+	const int half = buffers->count/2;
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = 0; i < buffers->count; i++) {
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xdeadbeef^~i);
+		buffers->mode->set_bo(buffers, buffers->dst[i], i);
+	}
+	for (i = 0; i < half; i++) {
+		if (do_rcs == 1 || (do_rcs == -1 && i & 1))
+			render_copy_bo(buffers, buffers->dst[i], buffers->src[i]);
+		else
+			blt_copy_bo(buffers, buffers->dst[i], buffers->src[i]);
+
+		do_copy_func(buffers, buffers->dst[i+half], buffers->src[i]);
+
+		if (do_rcs == 1 || (do_rcs == -1 && (i & 1) == 0))
+			render_copy_bo(buffers, buffers->dst[i], buffers->dst[i+half]);
+		else
+			blt_copy_bo(buffers, buffers->dst[i], buffers->dst[i+half]);
+
+		do_copy_func(buffers, buffers->dst[i+half], buffers->src[i+half]);
+	}
+	hang = do_hang_func();
+	for (i = 0; i < 2*half; i++)
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], 0xdeadbeef^~i);
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_intermix_rcs(struct buffers *buffers,
+			    do_copy do_copy_func,
+			    do_hang do_hang_func)
+{
+	do_intermix(buffers, do_copy_func, do_hang_func, 1);
+}
+
+static void do_intermix_bcs(struct buffers *buffers,
+			    do_copy do_copy_func,
+			    do_hang do_hang_func)
+{
+	do_intermix(buffers, do_copy_func, do_hang_func, 0);
+}
+
+static void do_intermix_both(struct buffers *buffers,
+			     do_copy do_copy_func,
+			     do_hang do_hang_func)
+{
+	do_intermix(buffers, do_copy_func, do_hang_func, -1);
+}
+
+static void do_early_read(struct buffers *buffers,
+			  do_copy do_copy_func,
+			  do_hang do_hang_func)
+{
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = buffers->count; i--; )
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xdeadbeef);
+	for (i = 0; i < buffers->count; i++)
+		do_copy_func(buffers, buffers->dst[i], buffers->src[i]);
+	hang = do_hang_func();
+	for (i = buffers->count; i--; )
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], 0xdeadbeef);
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_read_read_bcs(struct buffers *buffers,
+			     do_copy do_copy_func,
+			     do_hang do_hang_func)
+{
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = buffers->count; i--; )
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xdeadbeef ^ i);
+	for (i = 0; i < buffers->count; i++) {
+		do_copy_func(buffers, buffers->dst[i], buffers->src[i]);
+		blt_copy_bo(buffers, buffers->spare, buffers->src[i]);
+	}
+	buffers->mode->cmp_bo(buffers, buffers->spare, 0xdeadbeef^(buffers->count-1));
+	hang = do_hang_func();
+	for (i = buffers->count; i--; )
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], 0xdeadbeef ^ i);
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_write_read_bcs(struct buffers *buffers,
+			      do_copy do_copy_func,
+			      do_hang do_hang_func)
+{
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = buffers->count; i--; )
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xdeadbeef ^ i);
+	for (i = 0; i < buffers->count; i++) {
+		blt_copy_bo(buffers, buffers->spare, buffers->src[i]);
+		do_copy_func(buffers, buffers->dst[i], buffers->spare);
+	}
+	hang = do_hang_func();
+	for (i = buffers->count; i--; )
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], 0xdeadbeef ^ i);
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_read_read_rcs(struct buffers *buffers,
+			     do_copy do_copy_func,
+			     do_hang do_hang_func)
+{
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = buffers->count; i--; )
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xdeadbeef ^ i);
+	for (i = 0; i < buffers->count; i++) {
+		do_copy_func(buffers, buffers->dst[i], buffers->src[i]);
+		render_copy_bo(buffers, buffers->spare, buffers->src[i]);
+	}
+	buffers->mode->cmp_bo(buffers, buffers->spare, 0xdeadbeef^(buffers->count-1));
+	hang = do_hang_func();
+	for (i = buffers->count; i--; )
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], 0xdeadbeef ^ i);
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_write_read_rcs(struct buffers *buffers,
+			      do_copy do_copy_func,
+			      do_hang do_hang_func)
+{
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = buffers->count; i--; )
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xdeadbeef ^ i);
+	for (i = 0; i < buffers->count; i++) {
+		render_copy_bo(buffers, buffers->spare, buffers->src[i]);
+		do_copy_func(buffers, buffers->dst[i], buffers->spare);
+	}
+	hang = do_hang_func();
+	for (i = buffers->count; i--; )
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], 0xdeadbeef ^ i);
+	igt_post_hang_ring(fd, hang);
+}
+
+static void do_gpu_read_after_write(struct buffers *buffers,
+				    do_copy do_copy_func,
+				    do_hang do_hang_func)
+{
+	igt_hang_t hang;
+	int i;
+
+	gem_quiescent_gpu(fd);
+	for (i = buffers->count; i--; )
+		buffers->mode->set_bo(buffers, buffers->src[i], 0xabcdabcd);
+	for (i = 0; i < buffers->count; i++)
+		do_copy_func(buffers, buffers->dst[i], buffers->src[i]);
+	for (i = buffers->count; i--; )
+		do_copy_func(buffers, buffers->spare, buffers->dst[i]);
+	hang = do_hang_func();
+	for (i = buffers->count; i--; )
+		buffers->mode->cmp_bo(buffers, buffers->dst[i], 0xabcdabcd);
+	igt_post_hang_ring(fd, hang);
+}
+
+typedef void (*do_test)(struct buffers *buffers,
+			do_copy do_copy_func,
+			do_hang do_hang_func);
+
+typedef void (*run_wrap)(struct buffers *buffers,
+			 do_test do_test_func,
+			 do_copy do_copy_func,
+			 do_hang do_hang_func);
+
+static void run_single(struct buffers *buffers,
+		       do_test do_test_func,
+		       do_copy do_copy_func,
+		       do_hang do_hang_func)
+{
+	pass = 0;
+	do_test_func(buffers, do_copy_func, do_hang_func);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void run_interruptible(struct buffers *buffers,
+			      do_test do_test_func,
+			      do_copy do_copy_func,
+			      do_hang do_hang_func)
+{
+	pass = 0;
+	igt_while_interruptible(true)
+		do_test_func(buffers, do_copy_func, do_hang_func);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void run_child(struct buffers *buffers,
+		      do_test do_test_func,
+		      do_copy do_copy_func,
+		      do_hang do_hang_func)
+
+{
+	/* We inherit the buffers from the parent, but the bufmgr/batch
+	 * needs to be local as the cache of reusable itself will be COWed,
+	 * leading to the child closing an object without the parent knowing.
+	 */
+	pass = 0;
+	igt_fork(child, 1)
+		do_test_func(buffers, do_copy_func, do_hang_func);
+	igt_waitchildren();
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void __run_forked(struct buffers *buffers,
+			 int num_children, int loops, bool interrupt,
+			 do_test do_test_func,
+			 do_copy do_copy_func,
+			 do_hang do_hang_func)
+
+{
+	/* purge the libdrm caches before cloing the process */
+	buffers_destroy(buffers);
+	intel_batchbuffer_free(buffers->batch);
+	drm_intel_bufmgr_destroy(buffers->bufmgr);
+
+	igt_fork(child, num_children) {
+		int num_buffers;
+
+		/* recreate process local variables */
+		fd = drm_open_driver(DRIVER_INTEL);
+
+		num_buffers = buffers->num_buffers / num_children;
+		num_buffers += MIN_BUFFERS;
+		if (num_buffers < buffers->num_buffers)
+			buffers->num_buffers = num_buffers;
+
+		buffers_reset(buffers, true);
+		buffers_create(buffers);
+
+		igt_while_interruptible(interrupt) {
+			for (pass = 0; pass < loops; pass++)
+				do_test_func(buffers,
+					     do_copy_func,
+					     do_hang_func);
+		}
+	}
+	igt_waitchildren();
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	buffers_reset(buffers, true);
+}
+
+static void run_forked(struct buffers *buffers,
+		       do_test do_test_func,
+		       do_copy do_copy_func,
+		       do_hang do_hang_func)
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	__run_forked(buffers, ncpus, ncpus, false,
+		     do_test_func, do_copy_func, do_hang_func);
+}
+
+static void run_bomb(struct buffers *buffers,
+		     do_test do_test_func,
+		     do_copy do_copy_func,
+		     do_hang do_hang_func)
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	__run_forked(buffers, 8*ncpus, 2, true,
+		     do_test_func, do_copy_func, do_hang_func);
+}
+
+static void cpu_require(void)
+{
+	bit17_require();
+}
+
+static void gtt_require(void)
+{
+}
+
+static void bcs_require(void)
+{
+}
+
+static void rcs_require(void)
+{
+	igt_require(rendercopy);
+}
+
+static void
+run_mode(const char *prefix,
+	 const struct create *create,
+	 const struct access_mode *mode,
+	 const struct size *size,
+	 const int num_buffers,
+	 const char *suffix,
+	 run_wrap run_wrap_func)
+{
+	const struct {
+		const char *prefix;
+		do_copy copy;
+		void (*require)(void);
+	} pipelines[] = {
+		{ "cpu", cpu_copy_bo, cpu_require },
+		{ "gtt", gtt_copy_bo, gtt_require },
+		{ "wc", wc_copy_bo, wc_require },
+		{ "blt", blt_copy_bo, bcs_require },
+		{ "render", render_copy_bo, rcs_require },
+		{ NULL, NULL }
+	}, *pskip = pipelines + 3, *p;
+	const struct {
+		const char *suffix;
+		do_hang hang;
+	} hangs[] = {
+		{ "", no_hang },
+		{ "-hang-blt", bcs_hang },
+		{ "-hang-render", rcs_hang },
+		{ "-hang-all", all_hang },
+		{ NULL, NULL },
+	}, *h;
+	struct buffers buffers;
+
+	igt_fixture
+		buffers_init(&buffers, prefix, create, mode,
+			     size, num_buffers,
+			     fd, run_wrap_func != run_child);
+
+	for (h = hangs; h->suffix; h++) {
+		if (!all && *h->suffix)
+			continue;
+
+		if (!*h->suffix)
+			igt_fixture
+				igt_fork_hang_detector(fd);
+
+		for (p = all ? pipelines : pskip; p->prefix; p++) {
+			igt_subtest_group  {
+				igt_fixture p->require();
+
+				igt_subtest_f("%s-%s-%s-sanitycheck0%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers, do_basic0,
+							p->copy, h->hang);
+				}
+
+				igt_subtest_f("%s-%s-%s-sanitycheck1%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers, do_basic1,
+							p->copy, h->hang);
+				}
+
+				igt_subtest_f("%s-%s-%s-sanitycheckN%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers, do_basicN,
+							p->copy, h->hang);
+				}
+
+				/* try to overwrite the source values */
+				igt_subtest_f("%s-%s-%s-overwrite-source-one%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_overwrite_source__one,
+							p->copy, h->hang);
+				}
+
+				igt_subtest_f("%s-%s-%s-overwrite-source%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_overwrite_source,
+							p->copy, h->hang);
+				}
+
+				igt_subtest_f("%s-%s-%s-overwrite-source-read-bcs%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_overwrite_source_read_bcs,
+							p->copy, h->hang);
+				}
+
+				igt_subtest_f("%s-%s-%s-overwrite-source-read-rcs%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					igt_require(rendercopy);
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_overwrite_source_read_rcs,
+							p->copy, h->hang);
+				}
+
+				igt_subtest_f("%s-%s-%s-overwrite-source-rev%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_overwrite_source__rev,
+							p->copy, h->hang);
+				}
+
+				/* try to intermix copies with GPU copies*/
+				igt_subtest_f("%s-%s-%s-intermix-rcs%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					igt_require(rendercopy);
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_intermix_rcs,
+							p->copy, h->hang);
+				}
+				igt_subtest_f("%s-%s-%s-intermix-bcs%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					igt_require(rendercopy);
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_intermix_bcs,
+							p->copy, h->hang);
+				}
+				igt_subtest_f("%s-%s-%s-intermix-both%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					igt_require(rendercopy);
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_intermix_both,
+							p->copy, h->hang);
+				}
+
+				/* try to read the results before the copy completes */
+				igt_subtest_f("%s-%s-%s-early-read%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_early_read,
+							p->copy, h->hang);
+				}
+
+				/* concurrent reads */
+				igt_subtest_f("%s-%s-%s-read-read-bcs%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_read_read_bcs,
+							p->copy, h->hang);
+				}
+				igt_subtest_f("%s-%s-%s-read-read-rcs%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					igt_require(rendercopy);
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_read_read_rcs,
+							p->copy, h->hang);
+				}
+
+				/* split copying between rings */
+				igt_subtest_f("%s-%s-%s-write-read-bcs%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_write_read_bcs,
+							p->copy, h->hang);
+				}
+				igt_subtest_f("%s-%s-%s-write-read-rcs%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					igt_require(rendercopy);
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_write_read_rcs,
+							p->copy, h->hang);
+				}
+
+				/* and finally try to trick the kernel into loosing the pending write */
+				igt_subtest_f("%s-%s-%s-gpu-read-after-write%s%s", prefix, mode->name, p->prefix, suffix, h->suffix) {
+					buffers_create(&buffers);
+					run_wrap_func(&buffers,
+							do_gpu_read_after_write,
+							p->copy, h->hang);
+				}
+			}
+		}
+
+		if (!*h->suffix)
+			igt_fixture
+				igt_stop_hang_detector();
+	}
+
+	igt_fixture
+		buffers_fini(&buffers);
+}
+
+static void
+run_modes(const char *style,
+	  const struct create *create,
+	  const struct access_mode *mode,
+	  const struct size *size,
+	  const int num)
+{
+	const struct wrap {
+		const char *suffix;
+		run_wrap func;
+	} wrappers[] = {
+		{ "", run_single },
+		{ "-child", run_child },
+		{ "-forked", run_forked },
+		{ "-interruptible", run_interruptible },
+		{ "-bomb", run_bomb },
+		{ NULL },
+	};
+
+	while (mode->name) {
+		igt_subtest_group {
+			igt_fixture {
+				if (mode->require)
+					mode->require(create, num);
+			}
+
+			for (const struct wrap *w = wrappers; w->suffix; w++) {
+				run_mode(style, create, mode, size, num,
+					 w->suffix, w->func);
+			}
+		}
+
+		mode++;
+	}
+}
+
+static unsigned
+num_buffers(uint64_t max,
+	    const struct size *s,
+	    const struct create *c,
+	    unsigned allow_mem)
+{
+	unsigned size = 4*s->width*s->height;
+	uint64_t n;
+
+	igt_assert(size);
+	n = max / (2*size);
+	n += MIN_BUFFERS;
+
+	igt_require(n < INT32_MAX);
+	igt_require(set_max_map_count(2*n));
+
+	if (c->require)
+		c->require(c, n);
+
+	intel_require_memory(2*n, size, allow_mem);
+
+	return n;
+}
+
+static bool allow_unlimited_files(void)
+{
+	struct rlimit rlim;
+	unsigned nofile_rlim = 1024*1024;
+
+	FILE *file = fopen("/proc/sys/fs/file-max", "r");
+	if (file) {
+		igt_assert(fscanf(file, "%u", &nofile_rlim) == 1);
+		igt_info("System limit for open files is %u\n", nofile_rlim);
+		fclose(file);
+	}
+
+	if (getrlimit(RLIMIT_NOFILE, &rlim))
+		return false;
+
+	rlim.rlim_cur = nofile_rlim;
+	rlim.rlim_max = nofile_rlim;
+	return setrlimit(RLIMIT_NOFILE, &rlim) == 0;
+}
+
+igt_main
+{
+	const struct access_mode modes[] = {
+		{
+			.name = "prw",
+			.create_bo = unmapped_create_bo,
+			.set_bo = prw_set_bo,
+			.cmp_bo = prw_cmp_bo,
+			.release_bo = nop_release_bo,
+		},
+		{
+			.name = "partial",
+			.create_bo = unmapped_create_bo,
+			.set_bo = partial_set_bo,
+			.cmp_bo = partial_cmp_bo,
+			.release_bo = nop_release_bo,
+		},
+		{
+			.name = "cpu",
+			.create_bo = unmapped_create_bo,
+			.require = create_cpu_require,
+			.set_bo = cpu_set_bo,
+			.cmp_bo = cpu_cmp_bo,
+			.release_bo = nop_release_bo,
+		},
+		{
+			.name = "snoop",
+			.create_bo = snoop_create_bo,
+			.require = create_snoop_require,
+			.set_bo = cpu_set_bo,
+			.cmp_bo = cpu_cmp_bo,
+			.release_bo = nop_release_bo,
+		},
+		{
+			.name = "userptr",
+			.create_bo = userptr_create_bo,
+			.require = create_userptr_require,
+			.set_bo = userptr_set_bo,
+			.cmp_bo = userptr_cmp_bo,
+			.release_bo = userptr_release_bo,
+		},
+		{
+			.name = "dmabuf",
+			.create_bo = dmabuf_create_bo,
+			.require = create_dmabuf_require,
+			.set_bo = dmabuf_set_bo,
+			.cmp_bo = dmabuf_cmp_bo,
+			.release_bo = dmabuf_release_bo,
+		},
+		{
+			.name = "vgem",
+			.create_bo = vgem_create_bo,
+			.require = create_vgem_require,
+			.set_bo = dmabuf_set_bo,
+			.cmp_bo = dmabuf_cmp_bo,
+			.release_bo = dmabuf_release_bo,
+		},
+		{
+			.name = "gtt",
+			.create_bo = gtt_create_bo,
+			.set_bo = gtt_set_bo,
+			.cmp_bo = gtt_cmp_bo,
+			.release_bo = nop_release_bo,
+		},
+		{
+			.name = "gttX",
+			.create_bo = gttX_create_bo,
+			.set_bo = gtt_set_bo,
+			.cmp_bo = gtt_cmp_bo,
+			.release_bo = nop_release_bo,
+		},
+		{
+			.name = "wc",
+			.require = wc_create_require,
+			.create_bo = wc_create_bo,
+			.set_bo = gtt_set_bo,
+			.cmp_bo = gtt_cmp_bo,
+			.release_bo = wc_release_bo,
+		},
+		{
+			.name = "gpu",
+			.create_bo = gpu_create_bo,
+			.set_bo = gpu_set_bo,
+			.cmp_bo = gpu_cmp_bo,
+			.release_bo = nop_release_bo,
+		},
+		{
+			.name = "gpuX",
+			.create_bo = gpuX_create_bo,
+			.set_bo = gpu_set_bo,
+			.cmp_bo = gpu_cmp_bo,
+			.release_bo = nop_release_bo,
+		},
+		{ NULL },
+	};
+	const struct create create[] = {
+		{ "", can_create_normal, create_normal_bo},
+#if HAVE_CREATE_PRIVATE
+		{ "private-", can_create_private, create_private_bo},
+#endif
+#if HAVE_CREATE_STOLEN
+		{ "stolen-", can_create_stolen, create_stolen_bo},
+#endif
+		{ NULL, NULL }
+	};
+	const struct size sizes[] = {
+		{ "4KiB", 128, 8 },
+		{ "256KiB", 128, 128 },
+		{ "1MiB", 512, 512 },
+		{ "16MiB", 2048, 2048 },
+		{ NULL}
+	};
+	uint64_t pin_sz = 0;
+	void *pinned = NULL;
+	char name[80];
+	int count = 0;
+
+	igt_skip_on_simulation();
+
+	if (strstr(igt_test_name(), "all"))
+		all = true;
+
+	igt_fixture {
+		allow_unlimited_files();
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		intel_detect_and_clear_missed_interrupts(fd);
+		devid = intel_get_drm_devid(fd);
+		gen = intel_gen(devid);
+		rendercopy = igt_get_render_copyfunc(devid);
+
+		vgem_drv = __drm_open_driver(DRIVER_VGEM);
+	}
+
+	for (const struct create *c = create; c->name; c++) {
+		for (const struct size *s = sizes; s->name; s++) {
+			/* Minimum test set */
+			snprintf(name, sizeof(name), "%s%s-%s",
+				 c->name, s->name, "tiny");
+			igt_subtest_group {
+				igt_fixture {
+					count = num_buffers(0, s, c, CHECK_RAM);
+				}
+				run_modes(name, c, modes, s, count);
+			}
+
+			/* "Average" test set */
+			snprintf(name, sizeof(name), "%s%s-%s",
+				 c->name, s->name, "small");
+			igt_subtest_group {
+				igt_fixture {
+					count = num_buffers(gem_mappable_aperture_size()/4,
+							    s, c, CHECK_RAM);
+				}
+				run_modes(name, c, modes, s, count);
+			}
+
+			/* Use the entire mappable aperture */
+			snprintf(name, sizeof(name), "%s%s-%s",
+				 c->name, s->name, "thrash");
+			igt_subtest_group {
+				igt_fixture {
+					count = num_buffers(gem_mappable_aperture_size(),
+							    s, c, CHECK_RAM);
+				}
+				run_modes(name, c, modes, s, count);
+			}
+
+			/* Use the entire global GTT */
+			snprintf(name, sizeof(name), "%s%s-%s",
+				 c->name, s->name, "global");
+			igt_subtest_group {
+				igt_fixture {
+					count = num_buffers(gem_global_aperture_size(fd),
+							    s, c, CHECK_RAM);
+				}
+				run_modes(name, c, modes, s, count);
+			}
+
+			/* Use the entire per-process GTT */
+			snprintf(name, sizeof(name), "%s%s-%s",
+				 c->name, s->name, "full");
+			igt_subtest_group {
+				igt_fixture {
+					count = num_buffers(gem_aperture_size(fd),
+							    s, c, CHECK_RAM);
+				}
+				run_modes(name, c, modes, s, count);
+			}
+
+			snprintf(name, sizeof(name), "%s%s-%s",
+				 c->name, s->name, "shrink");
+			igt_subtest_group {
+				igt_fixture {
+					count = num_buffers(gem_mappable_aperture_size(),
+							    s, c, CHECK_RAM);
+
+					igt_fork_shrink_helper(fd);
+				}
+				run_modes(name, c, modes, s, count);
+
+				igt_fixture
+					igt_stop_shrink_helper();
+			}
+
+			/* Use the entire mappable aperture, force swapping */
+			snprintf(name, sizeof(name), "%s%s-%s",
+				 c->name, s->name, "swap");
+			igt_subtest_group {
+				igt_fixture {
+					if (intel_get_avail_ram_mb() > gem_mappable_aperture_size()/(1024*1024)) {
+						pin_sz = intel_get_avail_ram_mb() - gem_mappable_aperture_size()/(1024*1024);
+
+						igt_debug("Pinning %lld MiB\n", (long long)pin_sz);
+						pin_sz *= 1024 * 1024;
+
+						if (posix_memalign(&pinned, 4096, pin_sz) ||
+						    mlock(pinned, pin_sz) ||
+						    madvise(pinned, pin_sz, MADV_DONTFORK)) {
+							free(pinned);
+							pinned = NULL;
+						}
+						igt_require(pinned);
+					}
+
+					count = num_buffers(gem_mappable_aperture_size(),
+							    s, c, CHECK_RAM | CHECK_SWAP);
+				}
+				run_modes(name, c, modes, s, count);
+
+				igt_fixture {
+					if (pinned) {
+						munlock(pinned, pin_sz);
+						free(pinned);
+						pinned = NULL;
+					}
+				}
+			}
+		}
+	}
+}
diff --git a/tests/i915/gem_concurrent_blit.c b/tests/i915/gem_concurrent_blit.c
new file mode 100644
index 00000000..513de4a1
--- /dev/null
+++ b/tests/i915/gem_concurrent_blit.c
@@ -0,0 +1,8 @@
+/* This test is just a duplicate of gem_concurrent_all. */
+/* However the executeable will be gem_concurrent_blit. */
+/* The main function examines argv[0] and, in the case  */
+/* of gem_concurent_blit runs only a subset of the      */
+/* available subtests. This avoids the use of           */
+/* non-standard command line parameters which can cause */
+/* problems for automated testing */
+#include "gem_concurrent_all.c"
diff --git a/tests/i915/gem_cpu_reloc.c b/tests/i915/gem_cpu_reloc.c
new file mode 100644
index 00000000..882c312d
--- /dev/null
+++ b/tests/i915/gem_cpu_reloc.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/*
+ * Testcase: Test the relocations through the CPU domain
+ *
+ * Attempt to stress test performing relocations whilst the batch is in the
+ * CPU domain.
+ *
+ * A freshly allocated buffer starts in the CPU domain, and the pwrite
+ * should also be performed whilst in the CPU domain and so we should
+ * execute the relocations within the CPU domain. If for any reason one of
+ * those steps should land it in the GTT domain, we take the secondary
+ * precaution of filling the mappable portion of the GATT.
+ *
+ * In order to detect whether a relocation fails, we first fill a target
+ * buffer with a sequence of invalid commands that would cause the GPU to
+ * immediate hang, and then attempt to overwrite them with a legal, if
+ * short, batchbuffer using a BLT. Then we come to execute the bo, if the
+ * relocation fail and we either copy across all zeros or garbage, then the
+ * GPU will hang.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
+
+static uint32_t use_blt;
+
+static void copy(int fd, uint32_t batch, uint32_t src, uint32_t dst)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_relocation_entry gem_reloc[2];
+	struct drm_i915_gem_exec_object2 gem_exec[3];
+
+	gem_reloc[0].offset = 4 * sizeof(uint32_t);
+	gem_reloc[0].delta = 0;
+	gem_reloc[0].target_handle = dst;
+	gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	gem_reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	gem_reloc[0].presumed_offset = -1;
+
+	gem_reloc[1].offset = 7 * sizeof(uint32_t);
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		gem_reloc[1].offset += sizeof(uint32_t);
+	gem_reloc[1].delta = 0;
+	gem_reloc[1].target_handle = src;
+	gem_reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	gem_reloc[1].write_domain = 0;
+	gem_reloc[1].presumed_offset = -1;
+
+	memset(gem_exec, 0, sizeof(gem_exec));
+	gem_exec[0].handle = src;
+	gem_exec[1].handle = dst;
+	gem_exec[2].handle = batch;
+	gem_exec[2].relocation_count = 2;
+	gem_exec[2].relocs_ptr = to_user_pointer(gem_reloc);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(gem_exec);
+	execbuf.buffer_count = 3;
+	execbuf.batch_len = 4096;
+	execbuf.flags = use_blt;
+
+	gem_execbuf(fd, &execbuf);
+}
+
+static void exec(int fd, uint32_t handle)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec;
+
+	memset(&gem_exec, 0, sizeof(gem_exec));
+	gem_exec.handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&gem_exec);
+	execbuf.buffer_count = 1;
+	execbuf.batch_len = 4096;
+
+	gem_execbuf(fd, &execbuf);
+}
+
+uint32_t gen6_batch[] = {
+	(XY_SRC_COPY_BLT_CMD | 6 |
+	 XY_SRC_COPY_BLT_WRITE_ALPHA |
+	 XY_SRC_COPY_BLT_WRITE_RGB),
+	(3 << 24 | /* 32 bits */
+	 0xcc << 16 | /* copy ROP */
+	 4096),
+	0 << 16 | 0, /* dst x1, y1 */
+	1 << 16 | 2,
+	0, /* dst relocation */
+	0 << 16 | 0, /* src x1, y1 */
+	4096,
+	0, /* src relocation */
+	MI_BATCH_BUFFER_END,
+};
+
+uint32_t gen8_batch[] = {
+	(XY_SRC_COPY_BLT_CMD | 8 |
+	 XY_SRC_COPY_BLT_WRITE_ALPHA |
+	 XY_SRC_COPY_BLT_WRITE_RGB),
+	(3 << 24 | /* 32 bits */
+	 0xcc << 16 | /* copy ROP */
+	 4096),
+	0 << 16 | 0, /* dst x1, y1 */
+	1 << 16 | 2,
+	0, /* dst relocation */
+	0, /* FIXME */
+	0 << 16 | 0, /* src x1, y1 */
+	4096,
+	0, /* src relocation */
+	0, /* FIXME */
+	MI_BATCH_BUFFER_END,
+};
+
+uint32_t *batch = gen6_batch;
+uint32_t batch_size = sizeof(gen6_batch);
+
+static void run_test(int fd, int count)
+{
+	const uint32_t hang[] = {-1, -1, -1, -1};
+	const uint32_t end[] = {MI_BATCH_BUFFER_END, 0};
+	uint32_t noop;
+	uint32_t *handles;
+	int i;
+
+	noop = intel_get_drm_devid(fd);
+
+	use_blt = 0;
+	if (intel_gen(noop) >= 6)
+		use_blt = I915_EXEC_BLT;
+
+	if (intel_gen(noop) >= 8) {
+		batch = gen8_batch;
+		batch_size += 2 * 4;
+	}
+
+	handles = malloc (count * sizeof(uint32_t));
+	igt_assert(handles);
+
+	noop = gem_create(fd, 4096);
+	gem_write(fd, noop, 0, end, sizeof(end));
+
+	/* fill the entire gart with batches and run them */
+	for (i = 0; i < count; i++) {
+		uint32_t bad;
+
+		handles[i] = gem_create(fd, 4096);
+		gem_write(fd, handles[i], 0, batch, batch_size);
+
+		bad = gem_create(fd, 4096);
+		gem_write(fd, bad, 0, hang, sizeof(hang));
+		gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
+
+		/* launch the newly created batch */
+		copy(fd, handles[i], noop, bad);
+		exec(fd, bad);
+		gem_close(fd, bad);
+
+		igt_progress("gem_cpu_reloc: ", i, 2*count);
+	}
+
+	/* And again in reverse to try and catch the relocation code out */
+	for (i = 0; i < count; i++) {
+		uint32_t bad;
+
+		bad = gem_create(fd, 4096);
+		gem_write(fd, bad, 0, hang, sizeof(hang));
+		gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
+
+		/* launch the newly created batch */
+		copy(fd, handles[count-i-1], noop, bad);
+		exec(fd, bad);
+		gem_close(fd, bad);
+
+		igt_progress("gem_cpu_reloc: ", count+i, 3*count);
+	}
+
+	/* Third time lucky? */
+	for (i = 0; i < count; i++) {
+		uint32_t bad;
+
+		bad = gem_create(fd, 4096);
+		gem_write(fd, bad, 0, hang, sizeof(hang));
+		gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
+
+		/* launch the newly created batch */
+		gem_set_domain(fd, handles[i],
+			       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+		copy(fd, handles[i], noop, bad);
+		exec(fd, bad);
+		gem_close(fd, bad);
+
+		igt_progress("gem_cpu_reloc: ", 2*count+i, 3*count);
+	}
+
+	igt_info("Subtest suceeded, cleanup up - this might take a while.\n");
+	for (i = 0; i < count; i++) {
+		gem_close(fd, handles[i]);
+	}
+	gem_close(fd, noop);
+	free(handles);
+}
+
+igt_main
+{
+	uint64_t aper_size;
+	int fd, count;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	igt_subtest("basic") {
+		run_test (fd, 10);
+	}
+
+
+	igt_subtest("full") {
+		aper_size = gem_mappable_aperture_size();
+		count = aper_size / 4096 * 2;
+
+		/* count + 2 (noop & bad) buffers. A gem object appears to
+                   require about 2kb + buffer + kernel overhead */
+		intel_require_memory(2+count, 2048+4096, CHECK_RAM);
+
+		run_test (fd, count);
+	}
+
+	igt_fixture {
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_create.c b/tests/i915/gem_create.c
new file mode 100644
index 00000000..25c5e808
--- /dev/null
+++ b/tests/i915/gem_create.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
+ *
+ */
+
+/** @file gem_create.c
+ *
+ * This is a test for the extended and old gem_create ioctl, that
+ * includes allocation of object from stolen memory and shmem.
+ *
+ * The goal is to simply ensure that basics work and invalid input
+ * combinations are rejected.
+ */
+
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <getopt.h>
+
+#include <drm.h>
+
+#include "ioctl_wrappers.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_io.h"
+#include "intel_chipset.h"
+#include "igt_aux.h"
+#include "drmtest.h"
+#include "drm.h"
+#include "i915_drm.h"
+
+IGT_TEST_DESCRIPTION("This is a test for the extended & old gem_create ioctl,"
+		     " that includes allocation of object from stolen memory"
+		     " and shmem.");
+
+#define CLEAR(s) memset(&s, 0, sizeof(s))
+#define PAGE_SIZE 4096
+
+struct local_i915_gem_create_v2 {
+	uint64_t size;
+	uint32_t handle;
+	uint32_t pad;
+#define I915_CREATE_PLACEMENT_STOLEN (1<<0)
+	uint32_t flags;
+} create;
+
+#define LOCAL_IOCTL_I915_GEM_CREATE       DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct local_i915_gem_create_v2)
+
+static void invalid_flag_test(int fd)
+{
+	int ret;
+
+	gem_require_stolen_support(fd);
+
+	create.handle = 0;
+	create.size = PAGE_SIZE;
+	create.flags = ~I915_CREATE_PLACEMENT_STOLEN;
+	ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CREATE, &create);
+
+	igt_assert(ret <= 0);
+
+	create.flags = ~0;
+	ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CREATE, &create);
+
+	igt_assert(ret <= 0);
+}
+
+static void invalid_size_test(int fd)
+{
+	uint32_t handle;
+
+	igt_assert_eq(__gem_create(fd, 0, &handle), -EINVAL);
+}
+
+/*
+ * Creating an object with non-aligned size and trying to access it with an
+ * offset, which is greater than the requested size but smaller than the
+ * object's last page boundary. pwrite here must be successful.
+ */
+static void valid_nonaligned_size(int fd)
+{
+	int handle;
+	char buf[PAGE_SIZE];
+
+	handle = gem_create(fd, PAGE_SIZE / 2);
+
+	gem_write(fd, handle, PAGE_SIZE / 2, buf, PAGE_SIZE / 2);
+
+	gem_close(fd, handle);
+}
+
+/*
+ * Creating an object with non-aligned size and trying to access it with an
+ * offset, which is greater than the requested size and larger than the
+ * object's last page boundary. pwrite here must fail.
+ */
+static void invalid_nonaligned_size(int fd)
+{
+	int handle;
+	char buf[PAGE_SIZE];
+	struct drm_i915_gem_pwrite gem_pwrite;
+
+	handle = gem_create(fd, PAGE_SIZE / 2);
+
+	CLEAR(gem_pwrite);
+	gem_pwrite.handle = handle;
+	gem_pwrite.offset = PAGE_SIZE / 2;
+	gem_pwrite.size = PAGE_SIZE;
+	gem_pwrite.data_ptr = to_user_pointer(buf);
+	/* This should fail. Hence cannot use gem_write. */
+	igt_assert(drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &gem_pwrite));
+
+	gem_close(fd, handle);
+}
+
+igt_main
+{
+	int fd = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+	}
+
+	igt_subtest("stolen-invalid-flag")
+		invalid_flag_test(fd);
+
+	igt_subtest("create-invalid-size")
+		invalid_size_test(fd);
+
+	igt_subtest("create-valid-nonaligned")
+		valid_nonaligned_size(fd);
+
+	igt_subtest("create-invalid-nonaligned")
+		invalid_nonaligned_size(fd);
+}
diff --git a/tests/i915/gem_cs_prefetch.c b/tests/i915/gem_cs_prefetch.c
new file mode 100644
index 00000000..2b865368
--- /dev/null
+++ b/tests/i915/gem_cs_prefetch.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/*
+ * Testcase: Test the CS prefetch behaviour on batches
+ *
+ * Historically the batch prefetcher doesn't check whether it's crossing page
+ * boundaries and likes to throw up when it gets a pagefault in return for his
+ * over-eager behaviour. Check for this.
+ *
+ * This test for a bug where we've failed to plug a scratch pte entry into the
+ * very last gtt pte.
+ */
+#include "igt.h"
+
+IGT_TEST_DESCRIPTION("Test the CS prefetch behaviour on batches.");
+
+#define BATCH_SIZE 4096
+
+struct shadow {
+	uint32_t handle;
+	struct drm_i915_gem_relocation_entry reloc;
+};
+
+static void setup(int fd, int gen, struct shadow *shadow)
+{
+	uint32_t buf[16];
+	int i;
+
+	shadow->handle = gem_create(fd, 4096);
+
+	i = 0;
+	buf[i++] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		buf[i++] = BATCH_SIZE - sizeof(uint32_t);
+		buf[i++] = 0;
+	} else if (gen >= 4) {
+		buf[i++] = 0;
+		buf[i++] = BATCH_SIZE - sizeof(uint32_t);
+	} else {
+		buf[i-1]--;
+		buf[i++] = BATCH_SIZE - sizeof(uint32_t);
+	}
+	buf[i++] = MI_BATCH_BUFFER_END;
+	buf[i++] = MI_BATCH_BUFFER_END;
+	gem_write(fd, shadow->handle, 0, buf, sizeof(buf));
+
+	memset(&shadow->reloc, 0, sizeof(shadow->reloc));
+	if (gen >= 8 || gen < 4)
+		shadow->reloc.offset = sizeof(uint32_t);
+	else
+		shadow->reloc.offset = 2*sizeof(uint32_t);
+	shadow->reloc.delta = BATCH_SIZE - sizeof(uint32_t);
+	shadow->reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	shadow->reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+}
+
+static void can_test_ring(unsigned ring)
+{
+	int master = drm_open_driver_master(DRIVER_INTEL);
+	int fd = drm_open_driver(DRIVER_INTEL);
+
+	/* Dance to avoid dying with master open */
+	close(master);
+	igt_require_gem(fd);
+	gem_require_ring(fd, ring);
+	igt_require(gem_can_store_dword(fd, ring));
+	close(fd);
+}
+
+static void test_ring(unsigned ring)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct shadow shadow;
+	uint64_t i, count;
+	int fd, gen;
+
+	can_test_ring(ring);
+
+	fd = drm_open_driver_master(DRIVER_INTEL);
+	gen = intel_gen(intel_get_drm_devid(fd));
+	setup(fd, gen, &shadow);
+
+	count = gem_aperture_size(fd) / BATCH_SIZE;
+	intel_require_memory(count, BATCH_SIZE, CHECK_RAM);
+	/* Fill the entire gart with batches and run them. */
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = shadow.handle;
+	obj[1].relocs_ptr = to_user_pointer(&shadow.reloc);
+	obj[1].relocation_count = 1;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.flags = ring;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	for (i = 0; i < count; i++) {
+		/* Create the new batch using the GPU */
+		obj[0].handle = gem_create(fd, BATCH_SIZE);
+		shadow.reloc.target_handle = obj[0].handle;
+		execbuf.buffer_count = 2;
+		gem_execbuf(fd, &execbuf);
+
+		/* ...then execute the new batch */
+		execbuf.buffer_count = 1;
+		gem_execbuf(fd, &execbuf);
+
+		/* ...and leak the handle to consume the GTT */
+	}
+
+	close(fd);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+
+	igt_skip_on_simulation();
+
+	for (e = intel_execution_engines; e->name; e++)
+		igt_subtest_f("%s", e->name)
+			test_ring(e->exec_id | e->flags);
+}
diff --git a/tests/i915/gem_cs_tlb.c b/tests/i915/gem_cs_tlb.c
new file mode 100644
index 00000000..51e1c4e1
--- /dev/null
+++ b/tests/i915/gem_cs_tlb.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright © 2011,2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/*
+ * Testcase: Check whether we correctly invalidate the cs tlb
+ *
+ * Motivated by a strange bug on launchpad where *acth != ipehr, on snb notably
+ * where everything should be coherent by default.
+ *
+ * https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1063252
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+IGT_TEST_DESCRIPTION("Check whether we correctly invalidate the cs tlb.");
+
+#define LOCAL_I915_EXEC_VEBOX	(4<<0)
+#define EXEC_OBJECT_PINNED	(1<<4)
+#define BATCH_SIZE (1024*1024)
+
+static bool has_softpin(int fd)
+{
+	struct drm_i915_getparam gp;
+	int val = 0;
+
+	memset(&gp, 0, sizeof(gp));
+	gp.param = 37; /* I915_PARAM_HAS_EXEC_SOFTPIN */
+	gp.value = &val;
+
+	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
+		return 0;
+
+	errno = 0;
+	return (val == 1);
+}
+
+static void *
+mmap_coherent(int fd, uint32_t handle, int size)
+{
+	int domain;
+	void *ptr;
+
+	if (gem_has_llc(fd) || !gem_mmap__has_wc(fd)) {
+		domain = I915_GEM_DOMAIN_CPU;
+		ptr = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE);
+	} else {
+		domain = I915_GEM_DOMAIN_WC;
+		ptr = gem_mmap__wc(fd, handle, 0, size, PROT_WRITE);
+	}
+
+	gem_set_domain(fd, handle, domain, domain);
+	return ptr;
+}
+
+static void run_on_ring(int fd, unsigned ring_id, const char *ring_name)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 execobj;
+	struct {
+		uint32_t handle;
+		uint32_t *batch;
+	} obj[2];
+	unsigned i;
+	char buf[100];
+
+	gem_require_ring(fd, ring_id);
+	igt_require(has_softpin(fd));
+
+	for (i = 0; i < 2; i++) {
+		obj[i].handle = gem_create(fd, BATCH_SIZE);
+		obj[i].batch = mmap_coherent(fd, obj[i].handle, BATCH_SIZE);
+		memset(obj[i].batch, 0xff, BATCH_SIZE);
+	}
+
+	memset(&execobj, 0, sizeof(execobj));
+	execobj.handle = obj[0].handle;
+	obj[0].batch[0] = MI_BATCH_BUFFER_END;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&execobj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = ring_id;
+
+	/* Execute once to allocate a gtt-offset */
+	gem_execbuf(fd, &execbuf);
+	execobj.flags = EXEC_OBJECT_PINNED;
+
+	sprintf(buf, "Testing %s cs tlb coherency: ", ring_name);
+	for (i = 0; i < BATCH_SIZE/64; i++) {
+		execobj.handle = obj[i&1].handle;
+		obj[i&1].batch[i*64/4] = MI_BATCH_BUFFER_END;
+		execbuf.batch_start_offset = i*64;
+
+		gem_execbuf(fd, &execbuf);
+	}
+
+	for (i = 0; i < 2; i++) {
+		gem_close(fd, obj[i].handle);
+		munmap(obj[i].batch, BATCH_SIZE);
+	}
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	for (e = intel_execution_engines; e->name; e++)
+		igt_subtest_f("%s%s", e->exec_id ? "" : "basic-", e->name)
+			run_on_ring(fd, e->exec_id | e->flags, e->name);
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_ctx_bad_destroy.c b/tests/i915/gem_ctx_bad_destroy.c
new file mode 100644
index 00000000..50bb9aa0
--- /dev/null
+++ b/tests/i915/gem_ctx_bad_destroy.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+/*
+ * Negative test cases for destroy contexts
+  */
+
+#include "igt.h"
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+
+IGT_TEST_DESCRIPTION("Negative test cases for destroy contexts.");
+
+uint32_t ctx_id;
+int fd;
+
+igt_main
+{
+	igt_fixture {
+		fd = drm_open_driver_render(DRIVER_INTEL);
+
+		gem_require_contexts(fd);
+
+		ctx_id = gem_context_create(fd);
+		/* Make sure a proper destroy works first */
+		gem_context_destroy(fd, ctx_id);
+	}
+
+	/* try double destroy */
+	igt_subtest("double-destroy") {
+		ctx_id = gem_context_create(fd);
+		gem_context_destroy(fd, ctx_id);
+		igt_assert(__gem_context_destroy(fd, ctx_id) == -ENOENT);
+	}
+
+	igt_subtest("invalid-ctx")
+		igt_assert(__gem_context_destroy(fd, 2) == -ENOENT);
+
+	igt_subtest("invalid-default-ctx")
+		igt_assert(__gem_context_destroy(fd, 0) == -ENOENT);
+
+	igt_subtest("invalid-pad") {
+		struct drm_i915_gem_context_destroy destroy;
+
+		ctx_id = gem_context_create(fd);
+
+		memset(&destroy, 0, sizeof(destroy));
+		destroy.ctx_id = ctx_id;
+		destroy.pad = 1;
+
+		igt_assert(drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy) < 0 &&
+			   errno == EINVAL);
+		gem_context_destroy(fd, ctx_id);
+	}
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
new file mode 100644
index 00000000..a664070d
--- /dev/null
+++ b/tests/i915/gem_ctx_create.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include "igt.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+
+#include "igt_rand.h"
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+static unsigned all_engines[16];
+static unsigned all_nengine;
+
+static unsigned ppgtt_engines[16];
+static unsigned ppgtt_nengine;
+
+static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
+{
+	int ret = 0;
+	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
+		ret = -errno;
+	return ret;
+}
+
+static double elapsed(const struct timespec *start,
+		      const struct timespec *end)
+{
+	return (end->tv_sec - start->tv_sec) + 1e-9*(end->tv_nsec - start->tv_nsec);
+}
+
+static void files(int core, int timeout, const int ncpus)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	uint32_t batch, name;
+
+	batch = gem_create(core, 4096);
+	gem_write(core, batch, 0, &bbe, sizeof(bbe));
+	name = gem_flink(core, batch);
+
+	memset(&obj, 0, sizeof(obj));
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+
+	igt_fork(child, ncpus) {
+		struct timespec start, end;
+		unsigned count = 0;
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		do {
+			do {
+				int fd = drm_open_driver(DRIVER_INTEL);
+				obj.handle = gem_open(fd, name);
+				execbuf.flags &= ~ENGINE_FLAGS;
+				execbuf.flags |= ppgtt_engines[count % ppgtt_nengine];
+				gem_execbuf(fd, &execbuf);
+				close(fd);
+			} while (++count & 1023);
+			clock_gettime(CLOCK_MONOTONIC, &end);
+		} while (elapsed(&start, &end) < timeout);
+
+		gem_sync(core, batch);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		igt_info("[%d] File creation + execution: %.3f us\n",
+			 child, elapsed(&start, &end) / count *1e6);
+	}
+	igt_waitchildren();
+
+	gem_close(core, batch);
+}
+
+static void active(int fd, unsigned engine, int timeout, int ncpus)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	unsigned int nengine, engines[16];
+	unsigned *shared;
+
+	if (engine == ALL_ENGINES) {
+		igt_require(all_nengine);
+		nengine = all_nengine;
+		memcpy(engines, all_engines, sizeof(engines[0])*nengine);
+	} else {
+		gem_require_ring(fd, engine);
+		nengine = 1;
+		engines[0] = engine;
+	}
+
+	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(shared != MAP_FAILED);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+
+	if (ncpus < 0) {
+		igt_fork(child, ppgtt_nengine) {
+			unsigned long count = 0;
+
+			if (ppgtt_engines[child] == engine)
+				continue;
+
+			execbuf.flags = ppgtt_engines[child];
+
+			while (!*(volatile unsigned *)shared) {
+				obj.handle = gem_create(fd, 4096 << 10);
+				gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+				gem_execbuf(fd, &execbuf);
+				gem_close(fd, obj.handle);
+				count++;
+			}
+
+			igt_debug("hog[%d]: cycles=%lu\n", child, count);
+		}
+		ncpus = -ncpus;
+	}
+
+	igt_fork(child, ncpus) {
+		struct timespec start, end;
+		unsigned count = 0;
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		do {
+			do {
+				execbuf.rsvd1 = gem_context_create(fd);
+				for (unsigned n = 0; n < nengine; n++) {
+					execbuf.flags = engines[n];
+					gem_execbuf(fd, &execbuf);
+				}
+				gem_context_destroy(fd, execbuf.rsvd1);
+			} while (++count & 1023);
+			clock_gettime(CLOCK_MONOTONIC, &end);
+		} while (elapsed(&start, &end) < timeout);
+
+		gem_sync(fd, obj.handle);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		igt_info("[%d] Context creation + execution: %.3f us\n",
+			 child, elapsed(&start, &end) / count *1e6);
+
+		shared[0] = 1;
+	}
+	igt_waitchildren();
+
+	gem_close(fd, obj.handle);
+	munmap(shared, 4096);
+}
+
+static void xchg_u32(void *array, unsigned i, unsigned j)
+{
+	uint32_t *a = array, tmp;
+
+	tmp = a[i];
+	a[i] = a[j];
+	a[j] = tmp;
+}
+
+static unsigned __context_size(int fd)
+{
+	switch (intel_gen(intel_get_drm_devid(fd))) {
+	case 0:
+	case 1:
+	case 2:
+	case 3:
+	case 4:
+	case 5:
+	case 6:
+	case 7: return 17 << 12;
+	case 8: return 20 << 12;
+	case 9: return 22 << 12;
+	default: return 32 << 12;
+	}
+}
+
+static unsigned context_size(int fd)
+{
+	uint64_t size;
+
+	size = __context_size(fd);
+	if (ppgtt_nengine > 1) {
+		size += 4 << 12; /* ringbuffer as well */
+		size *= ppgtt_nengine;
+	}
+
+	return size;
+}
+
+static uint64_t total_avail_mem(unsigned mode)
+{
+	uint64_t total = intel_get_avail_ram_mb();
+	if (mode & CHECK_SWAP)
+		total += intel_get_total_swap_mb();
+	return total << 20;
+}
+
+static void maximum(int fd, int ncpus, unsigned mode)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	uint64_t avail_mem = total_avail_mem(mode);
+	unsigned ctx_size = context_size(fd);
+	uint32_t *contexts = NULL;
+	unsigned long count = 0;
+	uint32_t ctx_id;
+
+	do {
+		int err;
+
+		if ((count & -count) == count) {
+			int sz = count ? 2*count : 1;
+			contexts = realloc(contexts,
+					   sz*sizeof(*contexts));
+			igt_assert(contexts);
+		}
+
+		err = -ENOMEM;
+		if (avail_mem > (count + 1) * ctx_size)
+			err = __gem_context_create(fd, &ctx_id);
+		if (err) {
+			igt_info("Created %lu contexts, before failing with '%s' [%d]\n",
+				 count, strerror(-err), -err);
+			break;
+		}
+
+		contexts[count++] = ctx_id;
+	} while (1);
+	igt_require(count);
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+
+	igt_fork(child, ncpus) {
+		struct timespec start, end;
+
+		hars_petruska_f54_1_random_perturb(child);
+		obj[0].handle = gem_create(fd, 4096);
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		for (int repeat = 0; repeat < 3; repeat++) {
+			igt_permute_array(contexts, count, xchg_u32);
+			igt_permute_array(all_engines, all_nengine, xchg_u32);
+
+			for (unsigned long i = 0; i < count; i++) {
+				execbuf.rsvd1 = contexts[i];
+				for (unsigned long j = 0; j < all_nengine; j++) {
+					execbuf.flags = all_engines[j];
+					gem_execbuf(fd, &execbuf);
+				}
+			}
+		}
+		gem_sync(fd, obj[0].handle);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		gem_close(fd, obj[0].handle);
+
+		igt_info("[%d] Context execution: %.3f us\n", child,
+			 elapsed(&start, &end) / (3 * count * all_nengine) * 1e6);
+	}
+	igt_waitchildren();
+
+	gem_close(fd, obj[1].handle);
+
+	for (unsigned long i = 0; i < count; i++)
+		gem_context_destroy(fd, contexts[i]);
+	free(contexts);
+}
+
+igt_main
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	struct drm_i915_gem_context_create create;
+	int fd = -1;
+
+	igt_fixture {
+		unsigned engine;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		gem_require_contexts(fd);
+
+		for_each_physical_engine(fd, engine)
+			all_engines[all_nengine++] = engine;
+		igt_require(all_nengine);
+
+		if (gem_uses_full_ppgtt(fd)) {
+			ppgtt_nengine = all_nengine;
+			memcpy(ppgtt_engines,
+			       all_engines,
+			       all_nengine * sizeof(all_engines[0]));
+		} else
+			ppgtt_engines[ppgtt_nengine++] = 0;
+
+		igt_fork_hang_detector(fd);
+	}
+
+	igt_subtest("basic") {
+		memset(&create, 0, sizeof(create));
+		create.ctx_id = rand();
+		create.pad = 0;
+		igt_assert_eq(__gem_context_create_local(fd, &create), 0);
+		igt_assert(create.ctx_id != 0);
+		gem_context_destroy(fd, create.ctx_id);
+	}
+
+	igt_subtest("invalid-pad") {
+		memset(&create, 0, sizeof(create));
+		create.ctx_id = rand();
+		create.pad = 1;
+		igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
+	}
+
+	igt_subtest("maximum-mem")
+		maximum(fd, ncpus, CHECK_RAM);
+	igt_subtest("maximum-swap")
+		maximum(fd, ncpus, CHECK_RAM | CHECK_SWAP);
+
+	igt_subtest("basic-files")
+		files(fd, 5, 1);
+	igt_subtest("files")
+		files(fd, 150, 1);
+	igt_subtest("forked-files")
+		files(fd, 150, ncpus);
+
+	igt_subtest("active-all")
+		active(fd, ALL_ENGINES, 120, 1);
+	igt_subtest("forked-active-all")
+		active(fd, ALL_ENGINES, 120, ncpus);
+
+	for (const struct intel_execution_engine *e = intel_execution_engines;
+	     e->name; e++) {
+		igt_subtest_f("active-%s", e->name)
+			active(fd, e->exec_id | e->flags, 20, 1);
+		igt_subtest_f("forked-active-%s", e->name)
+			active(fd, e->exec_id | e->flags, 20, ncpus);
+		if (e->exec_id) {
+			igt_subtest_f("hog-%s", e->name)
+				active(fd, e->exec_id | e->flags, 20, -1);
+		}
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_ctx_exec.c b/tests/i915/gem_ctx_exec.c
new file mode 100644
index 00000000..908b59af
--- /dev/null
+++ b/tests/i915/gem_ctx_exec.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+#include "igt.h"
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Test context batch buffer execution.");
+
+/* Copied from gem_exec_nop.c */
+static int exec(int fd, uint32_t handle, int ring, int ctx_id)
+{
+	struct drm_i915_gem_exec_object2 obj = { .handle = handle };
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+	};
+
+	i915_execbuffer2_set_context_id(execbuf, ctx_id);
+
+	return __gem_execbuf(fd, &execbuf);
+}
+
+static void big_exec(int fd, uint32_t handle, int ring)
+{
+	int num_buffers = gem_global_aperture_size(fd) / 4096;
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffer_count = num_buffers,
+		.flags = ring,
+	};
+	struct drm_i915_gem_exec_object2 *gem_exec;
+	uint32_t ctx_id1, ctx_id2;
+	int i;
+
+	/* Make sure we only fill half of RAM with gem objects. */
+	igt_require(intel_get_total_ram_mb() * 1024 / 2 > num_buffers * 4);
+
+	gem_exec = calloc(num_buffers + 1, sizeof(*gem_exec));
+	igt_assert(gem_exec);
+	memset(gem_exec, 0, (num_buffers + 1) * sizeof(*gem_exec));
+
+	ctx_id1 = gem_context_create(fd);
+	ctx_id2 = gem_context_create(fd);
+
+	gem_exec[0].handle = handle;
+
+	execbuf.buffers_ptr = to_user_pointer(gem_exec);
+
+	execbuf.buffer_count = 1;
+	i915_execbuffer2_set_context_id(execbuf, ctx_id1);
+	gem_execbuf(fd, &execbuf);
+
+	for (i = 0; i < num_buffers; i++)
+		gem_exec[i].handle = gem_create(fd, 4096);
+	gem_exec[i].handle = handle;
+	execbuf.buffer_count = i + 1;
+
+	/* figure out how many buffers we can exactly fit */
+	while (__gem_execbuf(fd, &execbuf) != 0) {
+		i--;
+		gem_close(fd, gem_exec[i].handle);
+		gem_exec[i].handle = handle;
+		execbuf.buffer_count--;
+		igt_info("trying buffer count %i\n", i - 1);
+	}
+
+	igt_info("reduced buffer count to %i from %i\n", i - 1, num_buffers);
+
+	/* double check that it works */
+	gem_execbuf(fd, &execbuf);
+
+	i915_execbuffer2_set_context_id(execbuf, ctx_id2);
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, handle);
+}
+
+static void invalid_context(int fd, unsigned ring, uint32_t handle)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = handle,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+	};
+	unsigned int i;
+	uint32_t ctx;
+
+	/* Verify everything works. */
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	gem_execbuf(fd, &execbuf);
+
+	ctx = gem_context_create(fd);
+	i915_execbuffer2_set_context_id(execbuf, ctx);
+	gem_execbuf(fd, &execbuf);
+
+	gem_context_destroy(fd, ctx);
+
+	/* Go through the non-existent context id's. */
+	for (i = 0; i < 32; i++) {
+		i915_execbuffer2_set_context_id(execbuf, 1UL << i);
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);
+	}
+
+	i915_execbuffer2_set_context_id(execbuf, INT_MAX);
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);
+
+	i915_execbuffer2_set_context_id(execbuf, UINT_MAX);
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);
+}
+
+igt_main
+{
+	const uint32_t batch[2] = { 0, MI_BATCH_BUFFER_END };
+	const struct intel_execution_engine *e;
+	uint32_t handle;
+	uint32_t ctx_id;
+	int fd;
+
+	igt_fixture {
+		fd = drm_open_driver_render(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		gem_require_contexts(fd);
+
+		handle = gem_create(fd, 4096);
+		gem_write(fd, handle, 0, batch, sizeof(batch));
+	}
+
+	igt_subtest("basic") {
+		ctx_id = gem_context_create(fd);
+		igt_assert(exec(fd, handle, 0, ctx_id) == 0);
+		gem_sync(fd, handle);
+		gem_context_destroy(fd, ctx_id);
+
+		ctx_id = gem_context_create(fd);
+		igt_assert(exec(fd, handle, 0, ctx_id) == 0);
+		gem_sync(fd, handle);
+		gem_context_destroy(fd, ctx_id);
+
+		igt_assert(exec(fd, handle, 0, ctx_id) < 0);
+		gem_sync(fd, handle);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("basic-invalid-context-%s", e->name) {
+			gem_require_ring(fd, e->exec_id | e->flags);
+			invalid_context(fd, e->exec_id | e->flags, handle);
+		}
+	}
+
+	igt_subtest("eviction")
+		big_exec(fd, handle, 0);
+
+	igt_subtest("reset-pin-leak") {
+		int i;
+
+		igt_skip_on_simulation();
+
+		/*
+		 * Use an explicit context to isolate the test from
+		 * any major code changes related to the per-file
+		 * default context (eg. if they would be eliminated).
+		 */
+		ctx_id = gem_context_create(fd);
+
+		/*
+		 * Iterate enough times that the kernel will
+		 * become unhappy if the ggtt pin count for
+		 * the last context is leaked at every reset.
+		 */
+		for (i = 0; i < 20; i++) {
+			igt_hang_t hang = igt_hang_ring(fd, 0);
+
+			igt_assert_eq(exec(fd, handle, 0, 0), 0);
+			igt_assert_eq(exec(fd, handle, 0, ctx_id), 0);
+			igt_post_hang_ring(fd, hang);
+		}
+
+		gem_context_destroy(fd, ctx_id);
+	}
+}
diff --git a/tests/i915/gem_ctx_isolation.c b/tests/i915/gem_ctx_isolation.c
new file mode 100644
index 00000000..058cf3ec
--- /dev/null
+++ b/tests/i915/gem_ctx_isolation.c
@@ -0,0 +1,743 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_dummyload.h"
+
+#define MAX_REG 0x40000
+#define NUM_REGS (MAX_REG / sizeof(uint32_t))
+
+#define PAGE_ALIGN(x) ALIGN(x, 4096)
+
+#define DIRTY1 0x1
+#define DIRTY2 0x2
+#define RESET 0x4
+
+#define BIT(x) (1ul << (x))
+#define ENGINE(x, y) BIT(4*(x) + (y))
+
+enum {
+	RCS0 = ENGINE(I915_ENGINE_CLASS_RENDER, 0),
+	BCS0 = ENGINE(I915_ENGINE_CLASS_COPY, 0),
+	VCS0 = ENGINE(I915_ENGINE_CLASS_VIDEO, 0),
+	VCS1 = ENGINE(I915_ENGINE_CLASS_VIDEO, 1),
+	VECS0 = ENGINE(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+};
+
+#define ALL ~0u
+#define GEN_RANGE(x, y) ((ALL >> (32 - (y - x + 1))) << x)
+#define GEN4 (ALL << 4)
+#define GEN5 (ALL << 5)
+#define GEN6 (ALL << 6)
+#define GEN7 (ALL << 7)
+#define GEN8 (ALL << 8)
+#define GEN9 (ALL << 9)
+
+#define NOCTX 0
+
+#define LAST_KNOWN_GEN 10
+
+static const struct named_register {
+	const char *name;
+	unsigned int gen_mask;
+	unsigned int engine_mask;
+	uint32_t offset;
+	uint32_t count;
+	uint32_t ignore_bits;
+	bool masked;
+} nonpriv_registers[] = {
+	{ "NOPID", NOCTX, RCS0, 0x2094 },
+	{ "MI_PREDICATE_RESULT_2", NOCTX, RCS0, 0x23bc },
+	{ "INSTPM", GEN6, RCS0, 0x20c0, 1, BIT(8) /* ro counter */, true },
+	{ "IA_VERTICES_COUNT", GEN4, RCS0, 0x2310, 2 },
+	{ "IA_PRIMITIVES_COUNT", GEN4, RCS0, 0x2318, 2 },
+	{ "VS_INVOCATION_COUNT", GEN4, RCS0, 0x2320, 2 },
+	{ "HS_INVOCATION_COUNT", GEN4, RCS0, 0x2300, 2 },
+	{ "DS_INVOCATION_COUNT", GEN4, RCS0, 0x2308, 2 },
+	{ "GS_INVOCATION_COUNT", GEN4, RCS0, 0x2328, 2 },
+	{ "GS_PRIMITIVES_COUNT", GEN4, RCS0, 0x2330, 2 },
+	{ "CL_INVOCATION_COUNT", GEN4, RCS0, 0x2338, 2 },
+	{ "CL_PRIMITIVES_COUNT", GEN4, RCS0, 0x2340, 2 },
+	{ "PS_INVOCATION_COUNT_0", GEN4, RCS0, 0x22c8, 2 },
+	{ "PS_DEPTH_COUNT_0", GEN4, RCS0, 0x22d8, 2 },
+	{ "GPUGPU_DISPATCHDIMX", GEN8, RCS0, 0x2500 },
+	{ "GPUGPU_DISPATCHDIMY", GEN8, RCS0, 0x2504 },
+	{ "GPUGPU_DISPATCHDIMZ", GEN8, RCS0, 0x2508 },
+	{ "MI_PREDICATE_SRC0", GEN8, RCS0, 0x2400, 2 },
+	{ "MI_PREDICATE_SRC1", GEN8, RCS0, 0x2408, 2 },
+	{ "MI_PREDICATE_DATA", GEN8, RCS0, 0x2410, 2 },
+	{ "MI_PRED_RESULT", GEN8, RCS0, 0x2418 },
+	{ "3DPRIM_END_OFFSET", GEN6, RCS0, 0x2420 },
+	{ "3DPRIM_START_VERTEX", GEN6, RCS0, 0x2430 },
+	{ "3DPRIM_VERTEX_COUNT", GEN6, RCS0, 0x2434 },
+	{ "3DPRIM_INSTANCE_COUNT", GEN6, RCS0, 0x2438 },
+	{ "3DPRIM_START_INSTANCE", GEN6, RCS0, 0x243c },
+	{ "3DPRIM_BASE_VERTEX", GEN6, RCS0, 0x2440 },
+	{ "GPGPU_THREADS_DISPATCHED", GEN8, RCS0, 0x2290, 2 },
+	{ "PS_INVOCATION_COUNT_1", GEN8, RCS0, 0x22f0, 2 },
+	{ "PS_DEPTH_COUNT_1", GEN8, RCS0, 0x22f8, 2 },
+	{ "BB_OFFSET", GEN8, RCS0, 0x2158 },
+	{ "MI_PREDICATE_RESULT_1", GEN8, RCS0, 0x241c },
+	{ "CS_GPR", GEN8, RCS0, 0x2600, 32 },
+	{ "OA_CTX_CONTROL", GEN8, RCS0, 0x2360 },
+	{ "OACTXID", GEN8, RCS0, 0x2364 },
+	{ "PS_INVOCATION_COUNT_2", GEN8, RCS0, 0x2448, 2 },
+	{ "PS_DEPTH_COUNT_2", GEN8, RCS0, 0x2450, 2 },
+	{ "Cache_Mode_0", GEN7, RCS0, 0x7000 },
+	{ "Cache_Mode_1", GEN7, RCS0, 0x7004 },
+	{ "GT_MODE", GEN8, RCS0, 0x7008 },
+	{ "L3_Config", GEN7, RCS0, 0x7034 },
+	{ "TD_CTL", GEN8, RCS0, 0xe400 },
+	{ "TD_CTL2", GEN8, RCS0, 0xe404 },
+	{ "SO_NUM_PRIMS_WRITEN0", GEN6, RCS0, 0x5200, 2 },
+	{ "SO_NUM_PRIMS_WRITEN1", GEN6, RCS0, 0x5208, 2 },
+	{ "SO_NUM_PRIMS_WRITEN2", GEN6, RCS0, 0x5210, 2 },
+	{ "SO_NUM_PRIMS_WRITEN3", GEN6, RCS0, 0x5218, 2 },
+	{ "SO_PRIM_STORAGE_NEEDED0", GEN6, RCS0, 0x5240, 2 },
+	{ "SO_PRIM_STORAGE_NEEDED1", GEN6, RCS0, 0x5248, 2 },
+	{ "SO_PRIM_STORAGE_NEEDED2", GEN6, RCS0, 0x5250, 2 },
+	{ "SO_PRIM_STORAGE_NEEDED3", GEN6, RCS0, 0x5258, 2 },
+	{ "SO_WRITE_OFFSET0", GEN7, RCS0, 0x5280 },
+	{ "SO_WRITE_OFFSET1", GEN7, RCS0, 0x5284 },
+	{ "SO_WRITE_OFFSET2", GEN7, RCS0, 0x5288 },
+	{ "SO_WRITE_OFFSET3", GEN7, RCS0, 0x528c },
+	{ "OA_CONTROL", NOCTX, RCS0, 0x2b00 },
+	{ "PERF_CNT_1", NOCTX, RCS0, 0x91b8, 2 },
+	{ "PERF_CNT_2", NOCTX, RCS0, 0x91c0, 2 },
+
+	/* Privileged (enabled by w/a + FORCE_TO_NONPRIV) */
+	{ "CTX_PREEMPT", NOCTX /* GEN_RANGE(9, 10) */, RCS0, 0x2248 },
+	{ "CS_CHICKEN1", GEN_RANGE(9, 10), RCS0, 0x2580 },
+	{ "HDC_CHICKEN1", GEN_RANGE(9, 10), RCS0, 0x7304 },
+	{ "L3SQREG1", GEN8, RCS0, 0xb010 },
+
+	{ "BCS_GPR", GEN9, BCS0, 0x22600, 32 },
+	{ "BCS_SWCTRL", GEN8, BCS0, 0x22200 },
+
+	{ "VCS0_GPR", GEN9, VCS0, 0x12600, 32 },
+	{ "MFC_VDBOX1", NOCTX, VCS0, 0x12800, 64 },
+
+	{ "VCS1_GPR", GEN9, VCS1, 0x1c600, 32 },
+	{ "MFC_VDBOX2", NOCTX, VCS1, 0x1c800, 64 },
+
+	{ "VECS_GPR", GEN9, VECS0, 0x1a600, 32 },
+
+	{}
+}, ignore_registers[] = {
+	{ "RCS timestamp", GEN6, ~0u, 0x2358 },
+	{ "VCS0 timestamp", GEN7, ~0u, 0x12358 },
+	{ "VCS1 timestamp", GEN7, ~0u, 0x1c358 },
+	{ "BCS timestamp", GEN7, ~0u, 0x22358 },
+	{ "VECS timestamp", GEN8, ~0u, 0x1a358 },
+	{}
+};
+
+static const char *register_name(uint32_t offset, char *buf, size_t len)
+{
+	for (const struct named_register *r = nonpriv_registers; r->name; r++) {
+		unsigned int width = r->count ? 4*r->count : 4;
+		if (offset >= r->offset && offset < r->offset + width) {
+			if (r->count <= 1)
+				return r->name;
+
+			snprintf(buf, len, "%s[%d]",
+				 r->name, (offset - r->offset)/4);
+			return buf;
+		}
+	}
+
+	return "unknown";
+}
+
+static const struct named_register *lookup_register(uint32_t offset)
+{
+	for (const struct named_register *r = nonpriv_registers; r->name; r++) {
+		unsigned int width = r->count ? 4*r->count : 4;
+		if (offset >= r->offset && offset < r->offset + width)
+			return r;
+	}
+
+	return NULL;
+}
+
+static bool ignore_register(uint32_t offset)
+{
+	for (const struct named_register *r = ignore_registers; r->name; r++) {
+		unsigned int width = r->count ? 4*r->count : 4;
+		if (offset >= r->offset && offset < r->offset + width)
+			return true;
+	}
+
+	return false;
+}
+
+static uint32_t read_regs(int fd,
+			  uint32_t ctx,
+			  const struct intel_execution_engine2 *e,
+			  unsigned int flags)
+{
+	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
+	const unsigned int gen_bit = 1 << gen;
+	const unsigned int engine_bit = ENGINE(e->class, e->instance);
+	const bool r64b = gen >= 8;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry *reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned int regs_size, batch_size, n;
+	uint32_t *batch, *b;
+
+	reloc = calloc(NUM_REGS, sizeof(*reloc));
+	igt_assert(reloc);
+
+	regs_size = NUM_REGS * sizeof(uint32_t);
+	regs_size = PAGE_ALIGN(regs_size);
+
+	batch_size = NUM_REGS * 4 * sizeof(uint32_t) + 4;
+	batch_size = PAGE_ALIGN(batch_size);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, regs_size);
+	obj[1].handle = gem_create(fd, batch_size);
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+
+	b = batch = gem_mmap__cpu(fd, obj[1].handle, 0, batch_size, PROT_WRITE);
+	gem_set_domain(fd, obj[1].handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+	n = 0;
+	for (const struct named_register *r = nonpriv_registers; r->name; r++) {
+		if (!(r->engine_mask & engine_bit))
+			continue;
+		if (!(r->gen_mask & gen_bit))
+			continue;
+
+		for (unsigned count = r->count ?: 1, offset = r->offset;
+		     count--; offset += 4) {
+			*b++ = 0x24 << 23 | (1 + r64b); /* SRM */
+			*b++ = offset;
+			reloc[n].target_handle = obj[0].handle;
+			reloc[n].presumed_offset = 0;
+			reloc[n].offset = (b - batch) * sizeof(*b);
+			reloc[n].delta = offset;
+			reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
+			reloc[n].write_domain = I915_GEM_DOMAIN_RENDER;
+			*b++ = offset;
+			if (r64b)
+				*b++ = 0;
+			n++;
+		}
+	}
+
+	obj[1].relocation_count = n;
+	*b++ = MI_BATCH_BUFFER_END;
+	munmap(batch, batch_size);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags =
+		gem_class_instance_to_eb_flags(fd, e->class, e->instance);
+	execbuf.rsvd1 = ctx;
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, obj[1].handle);
+	free(reloc);
+
+	return obj[0].handle;
+}
+
+static void write_regs(int fd,
+		       uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       unsigned int flags,
+		       uint32_t value)
+{
+	const unsigned int gen_bit = 1 << intel_gen(intel_get_drm_devid(fd));
+	const unsigned int engine_bit = ENGINE(e->class, e->instance);
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned int batch_size;
+	uint32_t *batch, *b;
+
+	batch_size = NUM_REGS * 3 * sizeof(uint32_t) + 4;
+	batch_size = PAGE_ALIGN(batch_size);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, batch_size);
+
+	b = batch = gem_mmap__cpu(fd, obj.handle, 0, batch_size, PROT_WRITE);
+	gem_set_domain(fd, obj.handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	for (const struct named_register *r = nonpriv_registers; r->name; r++) {
+		if (!(r->engine_mask & engine_bit))
+			continue;
+		if (!(r->gen_mask & gen_bit))
+			continue;
+		for (unsigned count = r->count ?: 1, offset = r->offset;
+		     count--; offset += 4) {
+			*b++ = 0x22 << 23 | 1; /* LRI */
+			*b++ = offset;
+			if (r->masked)
+				*b++ = value | 0xffffu << 16;
+			else
+				*b++ = value;
+		}
+	}
+	*b++ = MI_BATCH_BUFFER_END;
+	munmap(batch, batch_size);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags =
+		gem_class_instance_to_eb_flags(fd, e->class, e->instance);
+	execbuf.rsvd1 = ctx;
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, obj.handle);
+}
+
+static void restore_regs(int fd,
+			 uint32_t ctx,
+			 const struct intel_execution_engine2 *e,
+			 unsigned int flags,
+			 uint32_t regs)
+{
+	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
+	const unsigned int gen_bit = 1 << gen;
+	const unsigned int engine_bit = ENGINE(e->class, e->instance);
+	const bool r64b = gen >= 8;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_relocation_entry *reloc;
+	unsigned int batch_size, n;
+	uint32_t *batch, *b;
+
+	if (gen < 7) /* no LRM */
+		return;
+
+	reloc = calloc(NUM_REGS, sizeof(*reloc));
+	igt_assert(reloc);
+
+	batch_size = NUM_REGS * 3 * sizeof(uint32_t) + 4;
+	batch_size = PAGE_ALIGN(batch_size);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = regs;
+	obj[1].handle = gem_create(fd, batch_size);
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+
+	b = batch = gem_mmap__cpu(fd, obj[1].handle, 0, batch_size, PROT_WRITE);
+	gem_set_domain(fd, obj[1].handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+	n = 0;
+	for (const struct named_register *r = nonpriv_registers; r->name; r++) {
+		if (!(r->engine_mask & engine_bit))
+			continue;
+		if (!(r->gen_mask & gen_bit))
+			continue;
+
+		for (unsigned count = r->count ?: 1, offset = r->offset;
+		     count--; offset += 4) {
+			*b++ = 0x29 << 23 | (1 + r64b); /* LRM */
+			*b++ = offset;
+			reloc[n].target_handle = obj[0].handle;
+			reloc[n].presumed_offset = 0;
+			reloc[n].offset = (b - batch) * sizeof(*b);
+			reloc[n].delta = offset;
+			reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
+			reloc[n].write_domain = 0;
+			*b++ = offset;
+			if (r64b)
+				*b++ = 0;
+			n++;
+		}
+	}
+	obj[1].relocation_count = n;
+	*b++ = MI_BATCH_BUFFER_END;
+	munmap(batch, batch_size);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags =
+		gem_class_instance_to_eb_flags(fd, e->class, e->instance);
+	execbuf.rsvd1 = ctx;
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, obj[1].handle);
+}
+
+__attribute__((unused))
+static void dump_regs(int fd,
+		      const struct intel_execution_engine2 *e,
+		      unsigned int regs)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const unsigned int gen_bit = 1 << gen;
+	const unsigned int engine_bit = ENGINE(e->class, e->instance);
+	unsigned int regs_size;
+	uint32_t *out;
+
+	regs_size = NUM_REGS * sizeof(uint32_t);
+	regs_size = PAGE_ALIGN(regs_size);
+
+	out = gem_mmap__cpu(fd, regs, 0, regs_size, PROT_READ);
+	gem_set_domain(fd, regs, I915_GEM_DOMAIN_CPU, 0);
+
+	for (const struct named_register *r = nonpriv_registers; r->name; r++) {
+		if (!(r->engine_mask & engine_bit))
+			continue;
+		if (!(r->gen_mask & gen_bit))
+			continue;
+
+		if (r->count <= 1) {
+			igt_debug("0x%04x (%s): 0x%08x\n",
+				  r->offset, r->name, out[r->offset/4]);
+		} else {
+			for (unsigned x = 0; x < r->count; x++)
+				igt_debug("0x%04x (%s[%d]): 0x%08x\n",
+					  r->offset+4*x, r->name, x,
+					  out[r->offset/4 + x]);
+		}
+	}
+	munmap(out, regs_size);
+}
+
+static void compare_regs(int fd, uint32_t A, uint32_t B, const char *who)
+{
+	unsigned int num_errors;
+	unsigned int regs_size;
+	uint32_t *a, *b;
+	char buf[80];
+
+	regs_size = NUM_REGS * sizeof(uint32_t);
+	regs_size = PAGE_ALIGN(regs_size);
+
+	a = gem_mmap__cpu(fd, A, 0, regs_size, PROT_READ);
+	gem_set_domain(fd, A, I915_GEM_DOMAIN_CPU, 0);
+
+	b = gem_mmap__cpu(fd, B, 0, regs_size, PROT_READ);
+	gem_set_domain(fd, B, I915_GEM_DOMAIN_CPU, 0);
+
+	num_errors = 0;
+	for (unsigned int n = 0; n < NUM_REGS; n++) {
+		const struct named_register *r;
+		uint32_t offset = n * sizeof(uint32_t);
+		uint32_t mask;
+
+		if (a[n] == b[n])
+			continue;
+
+		if (ignore_register(offset))
+			continue;
+
+		mask = ~0u;
+		r = lookup_register(offset);
+		if (r && r->masked)
+			mask >>= 16;
+		if (r && r->ignore_bits)
+			mask &= ~r->ignore_bits;
+
+		if ((a[n] & mask) == (b[n] & mask))
+			continue;
+
+		igt_warn("Register 0x%04x (%s): A=%08x B=%08x\n",
+			 offset,
+			 register_name(offset, buf, sizeof(buf)),
+			 a[n] & mask, b[n] & mask);
+		num_errors++;
+	}
+	munmap(b, regs_size);
+	munmap(a, regs_size);
+
+	igt_assert_f(num_errors == 0,
+		     "%d registers mistached between %s.\n",
+		     num_errors, who);
+}
+
+static void isolation(int fd,
+		      const struct intel_execution_engine2 *e,
+		      unsigned int flags)
+{
+	static const uint32_t values[] = {
+		0x0,
+		0xffffffff,
+		0xcccccccc,
+		0x33333333,
+		0x55555555,
+		0xaaaaaaaa,
+		0xdeadbeef
+	};
+	unsigned int engine = gem_class_instance_to_eb_flags(fd,
+							     e->class,
+							     e->instance);
+	unsigned int num_values =
+		flags & (DIRTY1 | DIRTY2) ? ARRAY_SIZE(values) : 1;
+
+	gem_quiescent_gpu(fd);
+
+	for (int v = 0; v < num_values; v++) {
+		igt_spin_t *spin = NULL;
+		uint32_t ctx[2], regs[2], tmp;
+
+		ctx[0] = gem_context_create(fd);
+		regs[0] = read_regs(fd, ctx[0], e, flags);
+
+		spin = igt_spin_batch_new(fd, .ctx = ctx[0], .engine = engine);
+
+		if (flags & DIRTY1) {
+			igt_debug("%s[%d]: Setting all registers of ctx 0 to 0x%08x\n",
+				  __func__, v, values[v]);
+			write_regs(fd, ctx[0], e, flags, values[v]);
+		}
+
+		/*
+		 * We create and execute a new context, whilst the HW is
+		 * occupied with the previous context (we should switch from
+		 * the old to the new proto-context without idling, which could
+		 * then load the powercontext). If all goes well, we only see
+		 * the default values from this context, but if goes badly we
+		 * see the corruption from the previous context instead!
+		 */
+		ctx[1] = gem_context_create(fd);
+		regs[1] = read_regs(fd, ctx[1], e, flags);
+
+		if (flags & DIRTY2) {
+			igt_debug("%s[%d]: Setting all registers of ctx 1 to 0x%08x\n",
+				  __func__, v, ~values[v]);
+			write_regs(fd, ctx[1], e, flags, ~values[v]);
+		}
+
+		/*
+		 * Restore the original register values before the HW idles.
+		 * Or else it may never restart!
+		 */
+		tmp = read_regs(fd, ctx[0], e, flags);
+		restore_regs(fd, ctx[0], e, flags, regs[0]);
+
+		igt_spin_batch_free(fd, spin);
+
+		if (!(flags & DIRTY1))
+			compare_regs(fd, regs[0], tmp, "two reads of the same ctx");
+		compare_regs(fd, regs[0], regs[1], "two virgin contexts");
+
+		for (int n = 0; n < ARRAY_SIZE(ctx); n++) {
+			gem_close(fd, regs[n]);
+			gem_context_destroy(fd, ctx[n]);
+		}
+		gem_close(fd, tmp);
+	}
+}
+
+#define NOSLEEP (0 << 8)
+#define S3_DEVICES (1 << 8)
+#define S3 (2 << 8)
+#define S4_DEVICES (3 << 8)
+#define S4 (4 << 8)
+#define SLEEP_MASK (0xf << 8)
+
+static void inject_reset_context(int fd, unsigned int engine)
+{
+	struct igt_spin_factory opts = {
+		.ctx = gem_context_create(fd),
+		.engine = engine,
+		.flags = IGT_SPIN_FAST,
+	};
+	igt_spin_t *spin;
+
+	/*
+	 * Force a context switch before triggering the reset, or else
+	 * we risk corrupting the target context and we can't blame the
+	 * HW for screwing up if the context was already broken.
+	 */
+
+	if (gem_can_store_dword(fd, engine))
+		opts.flags |= IGT_SPIN_POLL_RUN;
+
+	spin = __igt_spin_batch_factory(fd, &opts);
+
+	if (spin->running)
+		igt_spin_busywait_until_running(spin);
+	else
+		usleep(1000); /* better than nothing */
+
+	igt_force_gpu_reset(fd);
+
+	igt_spin_batch_free(fd, spin);
+	gem_context_destroy(fd, opts.ctx);
+}
+
+static void preservation(int fd,
+			 const struct intel_execution_engine2 *e,
+			 unsigned int flags)
+{
+	static const uint32_t values[] = {
+		0x0,
+		0xffffffff,
+		0xcccccccc,
+		0x33333333,
+		0x55555555,
+		0xaaaaaaaa,
+		0xdeadbeef
+	};
+	const unsigned int num_values = ARRAY_SIZE(values);
+	unsigned int engine =
+		gem_class_instance_to_eb_flags(fd, e->class, e->instance);
+	uint32_t ctx[num_values +1 ];
+	uint32_t regs[num_values + 1][2];
+	igt_spin_t *spin;
+
+	gem_quiescent_gpu(fd);
+
+	ctx[num_values] = gem_context_create(fd);
+	spin = igt_spin_batch_new(fd, .ctx = ctx[num_values], .engine = engine);
+	regs[num_values][0] = read_regs(fd, ctx[num_values], e, flags);
+	for (int v = 0; v < num_values; v++) {
+		ctx[v] = gem_context_create(fd);
+		write_regs(fd, ctx[v], e, flags, values[v]);
+
+		regs[v][0] = read_regs(fd, ctx[v], e, flags);
+
+	}
+	gem_close(fd, read_regs(fd, ctx[num_values], e, flags));
+	igt_spin_batch_free(fd, spin);
+
+	if (flags & RESET)
+		inject_reset_context(fd, engine);
+
+	switch (flags & SLEEP_MASK) {
+	case NOSLEEP:
+		break;
+
+	case S3_DEVICES:
+		igt_system_suspend_autoresume(SUSPEND_STATE_MEM,
+					      SUSPEND_TEST_DEVICES);
+		break;
+
+	case S3:
+		igt_system_suspend_autoresume(SUSPEND_STATE_MEM,
+					      SUSPEND_TEST_NONE);
+		break;
+
+	case S4_DEVICES:
+		igt_system_suspend_autoresume(SUSPEND_STATE_DISK,
+					      SUSPEND_TEST_DEVICES);
+		break;
+
+	case S4:
+		igt_system_suspend_autoresume(SUSPEND_STATE_DISK,
+					      SUSPEND_TEST_NONE);
+		break;
+	}
+
+	spin = igt_spin_batch_new(fd, .ctx = ctx[num_values], .engine = engine);
+	for (int v = 0; v < num_values; v++)
+		regs[v][1] = read_regs(fd, ctx[v], e, flags);
+	regs[num_values][1] = read_regs(fd, ctx[num_values], e, flags);
+	igt_spin_batch_free(fd, spin);
+
+	for (int v = 0; v < num_values; v++) {
+		char buf[80];
+
+		snprintf(buf, sizeof(buf), "dirty %x context\n", values[v]);
+		compare_regs(fd, regs[v][0], regs[v][1], buf);
+
+		gem_close(fd, regs[v][0]);
+		gem_close(fd, regs[v][1]);
+		gem_context_destroy(fd, ctx[v]);
+	}
+	compare_regs(fd, regs[num_values][0], regs[num_values][1], "clean");
+	gem_context_destroy(fd, ctx[num_values]);
+}
+
+static unsigned int __has_context_isolation(int fd)
+{
+	struct drm_i915_getparam gp;
+	int value = 0;
+
+	memset(&gp, 0, sizeof(gp));
+	gp.param = 50; /* I915_PARAM_HAS_CONTEXT_ISOLATION */
+	gp.value = &value;
+
+	igt_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	errno = 0;
+
+	return value;
+}
+
+igt_main
+{
+	unsigned int has_context_isolation = 0;
+	int fd = -1;
+
+	igt_fixture {
+		int gen;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		igt_require(gem_has_contexts(fd));
+
+		has_context_isolation = __has_context_isolation(fd);
+		igt_require(has_context_isolation);
+
+		gen = intel_gen(intel_get_drm_devid(fd));
+
+		igt_warn_on_f(gen > LAST_KNOWN_GEN,
+					  "GEN not recognized! Test needs to be updated to run.");
+		igt_skip_on(gen > LAST_KNOWN_GEN);
+	}
+
+	for (const struct intel_execution_engine2 *e = intel_execution_engines2;
+	     e->name; e++) {
+		igt_subtest_group {
+			igt_fixture {
+				igt_require(has_context_isolation & (1 << e->class));
+				gem_require_engine(fd, e->class, e->instance);
+				igt_fork_hang_detector(fd);
+			}
+
+			igt_subtest_f("%s-clean", e->name)
+				isolation(fd, e, 0);
+			igt_subtest_f("%s-dirty-create", e->name)
+				isolation(fd, e, DIRTY1);
+			igt_subtest_f("%s-dirty-switch", e->name)
+				isolation(fd, e, DIRTY2);
+
+			igt_subtest_f("%s-none", e->name)
+				preservation(fd, e, 0);
+			igt_subtest_f("%s-S3", e->name)
+				preservation(fd, e, S3);
+			igt_subtest_f("%s-S4", e->name)
+				preservation(fd, e, S4);
+
+			igt_fixture {
+				igt_stop_hang_detector();
+			}
+
+			igt_subtest_f("%s-reset", e->name) {
+				igt_hang_t hang = igt_allow_hang(fd, 0, 0);
+				preservation(fd, e, RESET);
+				igt_disallow_hang(fd, hang);
+			}
+		}
+	}
+}
diff --git a/tests/i915/gem_ctx_param.c b/tests/i915/gem_ctx_param.c
new file mode 100644
index 00000000..c46fd709
--- /dev/null
+++ b/tests/i915/gem_ctx_param.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+
+#include "igt.h"
+
+IGT_TEST_DESCRIPTION("Basic test for context set/get param input validation.");
+
+#define BIT(x) (1ul << (x))
+
+#define NEW_CTX	BIT(0)
+#define USER BIT(1)
+
+static int reopen_driver(int fd)
+{
+	char path[256];
+
+	snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
+	fd = open(path, O_RDWR);
+	igt_assert_lte(0, fd);
+
+	return fd;
+}
+
+static void set_priority(int i915)
+{
+	static const int64_t test_values[] = {
+		/* Test space too big, pick significant values */
+		INT_MIN,
+
+		I915_CONTEXT_MIN_USER_PRIORITY - 1,
+		I915_CONTEXT_MIN_USER_PRIORITY,
+		I915_CONTEXT_MIN_USER_PRIORITY + 1,
+
+		I915_CONTEXT_DEFAULT_PRIORITY - 1,
+		I915_CONTEXT_DEFAULT_PRIORITY,
+		I915_CONTEXT_DEFAULT_PRIORITY + 1,
+
+		I915_CONTEXT_MAX_USER_PRIORITY - 1,
+		I915_CONTEXT_MAX_USER_PRIORITY,
+		I915_CONTEXT_MAX_USER_PRIORITY + 1,
+
+		INT_MAX
+	};
+	unsigned int size;
+	int64_t *values;
+
+	igt_require(getuid() == 0);
+
+	size = ARRAY_SIZE(test_values);
+	values = malloc(sizeof(test_values) * 8);
+	igt_assert(values);
+
+	for (unsigned i = 0; i < size; i++) {
+		values[i + 0*size] = test_values[i];
+		values[i + 1*size] = test_values[i] | (uint64_t)1 << 32;
+		values[i + 2*size] = test_values[i] | (uint64_t)rand() << 32;
+		values[i + 3*size] = test_values[i] ^ rand();
+		values[i + 4*size] = rand() % (I915_CONTEXT_MAX_USER_PRIORITY - I915_CONTEXT_MIN_USER_PRIORITY) + I915_CONTEXT_MIN_USER_PRIORITY;
+		values[i + 5*size] = rand();
+		values[i + 6*size] = rand() | (uint64_t)rand() << 32;
+		values[i + 7*size] = (uint64_t)test_values[i] << 32;
+	}
+	size *= 8;
+
+	igt_permute_array(values, size, igt_exchange_int64);
+
+	igt_fork(flags, NEW_CTX | USER) {
+		int fd = reopen_driver(i915);
+		struct drm_i915_gem_context_param arg = {
+			.param = I915_CONTEXT_PARAM_PRIORITY,
+			.ctx_id = flags & NEW_CTX ? gem_context_create(fd) : 0,
+		};
+		int64_t old_prio;
+
+		if (flags & USER) {
+			igt_debug("Dropping root privilege\n");
+			igt_drop_root();
+		}
+
+		gem_context_get_param(fd, &arg);
+		old_prio = arg.value;
+
+		for (unsigned i = 0; i < size; i++) {
+			int64_t prio = values[i];
+			int expected = 0;
+			int err;
+
+			arg.value = prio;
+
+			if (flags & USER &&
+			    prio > I915_CONTEXT_DEFAULT_PRIORITY)
+				expected = -EPERM;
+
+			if (prio < I915_CONTEXT_MIN_USER_PRIORITY ||
+			    prio > I915_CONTEXT_MAX_USER_PRIORITY)
+				expected = -EINVAL;
+
+			err =__gem_context_set_param(fd, &arg);
+			igt_assert_f(err == expected,
+				     "Priority requested %" PRId64 " with flags %x, expected result %d, returned %d\n",
+				     prio, flags, expected, err);
+
+			gem_context_get_param(fd, &arg);
+			if (!err)
+				old_prio = prio;
+			igt_assert_eq(arg.value, old_prio);
+		}
+
+		arg.value = 0;
+		gem_context_set_param(fd, &arg);
+
+		if (flags & NEW_CTX)
+			gem_context_destroy(fd, arg.ctx_id);
+	}
+
+	igt_waitchildren();
+	free(values);
+}
+
+igt_main
+{
+	struct drm_i915_gem_context_param arg;
+	int fd;
+	uint32_t ctx;
+
+	memset(&arg, 0, sizeof(arg));
+
+	igt_fixture {
+		fd = drm_open_driver_render(DRIVER_INTEL);
+
+		gem_require_contexts(fd);
+		ctx = gem_context_create(fd);
+	}
+
+	arg.param = I915_CONTEXT_PARAM_BAN_PERIOD;
+
+	/* XXX start to enforce ban period returning -EINVAL when
+	 * transition has been done */
+	if (__gem_context_get_param(fd, &arg) == -EINVAL)
+		arg.param = I915_CONTEXT_PARAM_BANNABLE;
+
+	igt_subtest("basic") {
+		arg.ctx_id = ctx;
+		gem_context_get_param(fd, &arg);
+		gem_context_set_param(fd, &arg);
+	}
+
+	igt_subtest("basic-default") {
+		arg.ctx_id = 0;
+		gem_context_get_param(fd, &arg);
+		gem_context_set_param(fd, &arg);
+	}
+
+	igt_subtest("invalid-ctx-get") {
+		arg.ctx_id = 2;
+		igt_assert_eq(__gem_context_get_param(fd, &arg), -ENOENT);
+	}
+
+	igt_subtest("invalid-ctx-set") {
+		arg.ctx_id = ctx;
+		gem_context_get_param(fd, &arg);
+		arg.ctx_id = 2;
+		igt_assert_eq(__gem_context_set_param(fd, &arg), -ENOENT);
+	}
+
+	igt_subtest("invalid-size-get") {
+		arg.ctx_id = ctx;
+		arg.size = 8;
+		gem_context_get_param(fd, &arg);
+		igt_assert(arg.size == 0);
+	}
+
+	igt_subtest("invalid-size-set") {
+		arg.ctx_id = ctx;
+		gem_context_get_param(fd, &arg);
+		arg.size = 8;
+		igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+		arg.size = 0;
+	}
+
+	igt_subtest("non-root-set") {
+		igt_fork(child, 1) {
+			igt_drop_root();
+
+			arg.ctx_id = ctx;
+			gem_context_get_param(fd, &arg);
+			arg.value--;
+			igt_assert_eq(__gem_context_set_param(fd, &arg), -EPERM);
+		}
+
+		igt_waitchildren();
+	}
+
+	igt_subtest("root-set") {
+		arg.ctx_id = ctx;
+		gem_context_get_param(fd, &arg);
+		arg.value--;
+		gem_context_set_param(fd, &arg);
+	}
+
+	arg.param = I915_CONTEXT_PARAM_NO_ZEROMAP;
+
+	igt_subtest("non-root-set-no-zeromap") {
+		igt_fork(child, 1) {
+			igt_drop_root();
+
+			arg.ctx_id = ctx;
+			gem_context_get_param(fd, &arg);
+			arg.value--;
+			gem_context_set_param(fd, &arg);
+		}
+
+		igt_waitchildren();
+	}
+
+	igt_subtest("root-set-no-zeromap-enabled") {
+		arg.ctx_id = ctx;
+		gem_context_get_param(fd, &arg);
+		arg.value = 1;
+		gem_context_set_param(fd, &arg);
+	}
+
+	igt_subtest("root-set-no-zeromap-disabled") {
+		arg.ctx_id = ctx;
+		gem_context_get_param(fd, &arg);
+		arg.value = 0;
+		gem_context_set_param(fd, &arg);
+	}
+
+	arg.param = I915_CONTEXT_PARAM_PRIORITY;
+
+	igt_subtest("set-priority-not-supported") {
+		igt_require(!gem_scheduler_has_ctx_priority(fd));
+
+		arg.ctx_id = ctx;
+		arg.size = 0;
+
+		igt_assert_eq(__gem_context_set_param(fd, &arg), -ENODEV);
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_scheduler_has_ctx_priority(fd));
+		}
+
+		igt_subtest("get-priority-new-ctx") {
+			struct drm_i915_gem_context_param local_arg = arg;
+			uint32_t local_ctx = gem_context_create(fd);
+
+			local_arg.ctx_id = local_ctx;
+
+			gem_context_get_param(fd, &local_arg);
+			igt_assert_eq(local_arg.value, I915_CONTEXT_DEFAULT_PRIORITY);
+
+			gem_context_destroy(fd, local_ctx);
+		}
+
+		igt_subtest("set-priority-invalid-size") {
+			struct drm_i915_gem_context_param local_arg = arg;
+			local_arg.ctx_id = ctx;
+			local_arg.value = 0;
+			local_arg.size = ~0;
+
+			igt_assert_eq(__gem_context_set_param(fd, &local_arg), -EINVAL);
+		}
+
+		igt_subtest("set-priority-range")
+			set_priority(fd);
+	}
+
+	/* NOTE: This testcase intentionally tests for the next free parameter
+	 * to catch ABI extensions. Don't "fix" this testcase without adding all
+	 * the tests for the new param first.
+	 */
+	arg.param = I915_CONTEXT_PARAM_PRIORITY + 1;
+
+	igt_subtest("invalid-param-get") {
+		arg.ctx_id = ctx;
+		igt_assert_eq(__gem_context_get_param(fd, &arg), -EINVAL);
+	}
+
+	igt_subtest("invalid-param-set") {
+		arg.ctx_id = ctx;
+		igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+	}
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
new file mode 100644
index 00000000..1208cb8d
--- /dev/null
+++ b/tests/i915/gem_ctx_switch.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <time.h>
+#include "drm.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define INTERRUPTIBLE 1
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+	return ((end->tv_sec - start->tv_sec) +
+		(end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+static int measure_qlen(int fd,
+			struct drm_i915_gem_execbuffer2 *execbuf,
+			unsigned int *engine, unsigned int nengine,
+			int timeout)
+{
+	const struct drm_i915_gem_exec_object2 * const obj =
+		(struct drm_i915_gem_exec_object2 *)(uintptr_t)execbuf->buffers_ptr;
+	int qlen = 64;
+
+	for (unsigned int n = 0; n < nengine; n++) {
+		uint64_t saved = execbuf->flags;
+		struct timespec tv = {};
+
+		execbuf->flags |= engine[n];
+
+		igt_nsec_elapsed(&tv);
+		for (int loop = 0; loop < qlen; loop++)
+			gem_execbuf(fd, execbuf);
+		gem_sync(fd, obj->handle);
+
+		execbuf->flags = saved;
+
+		/*
+		 * Be conservative and aim not to overshoot timeout, so scale
+		 * down by 8 for hopefully a max of 12.5% error.
+		 */
+		qlen = qlen * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
+	}
+
+	return qlen;
+}
+
+static void single(int fd, uint32_t handle,
+		   const struct intel_execution_engine *e,
+		   unsigned flags,
+		   const int ncpus,
+		   int timeout)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_relocation_entry reloc;
+	uint32_t contexts[64];
+	struct {
+		double elapsed;
+		unsigned long count;
+	} *shared;
+	int n;
+
+	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(shared != MAP_FAILED);
+
+	gem_require_ring(fd, e->exec_id | e->flags);
+
+	for (n = 0; n < 64; n++)
+		contexts[n] = gem_context_create(fd);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = handle;
+
+	if (flags & INTERRUPTIBLE) {
+		/* Be tricksy and force a relocation every batch so that
+		 * we don't emit the batch but just do MI_SET_CONTEXT
+		 */
+		memset(&reloc, 0, sizeof(reloc));
+		reloc.offset = 1024;
+		reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		obj.relocs_ptr = to_user_pointer(&reloc);
+		obj.relocation_count = 1;
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.rsvd1 = contexts[0];
+	execbuf.flags = e->exec_id | e->flags;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	igt_require(__gem_execbuf(fd, &execbuf) == 0);
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags = e->exec_id | e->flags;
+		reloc.target_handle = obj.handle;
+		gem_execbuf(fd, &execbuf);
+	}
+	gem_sync(fd, handle);
+
+	igt_fork(child, ncpus) {
+		struct timespec start, now;
+		unsigned int count = 0;
+
+		/* Warmup to bind all objects into each ctx before we begin */
+		for (int i = 0; i < ARRAY_SIZE(contexts); i++) {
+			execbuf.rsvd1 = contexts[i];
+			gem_execbuf(fd, &execbuf);
+		}
+		gem_sync(fd, handle);
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		do {
+			igt_while_interruptible(flags & INTERRUPTIBLE) {
+				for (int loop = 0; loop < 64; loop++) {
+					execbuf.rsvd1 = contexts[loop % 64];
+					reloc.presumed_offset = -1;
+					gem_execbuf(fd, &execbuf);
+				}
+				count += 64;
+			}
+			clock_gettime(CLOCK_MONOTONIC, &now);
+		} while (elapsed(&start, &now) < timeout);
+		gem_sync(fd, handle);
+		clock_gettime(CLOCK_MONOTONIC, &now);
+
+		igt_info("[%d] %s: %'u cycles: %.3fus%s\n",
+			 child, e->name, count, elapsed(&start, &now)*1e6 / count,
+			 flags & INTERRUPTIBLE ? " (interruptible)" : "");
+
+		shared[child].elapsed = elapsed(&start, &now);
+		shared[child].count = count;
+	}
+	igt_waitchildren();
+
+	if (ncpus > 1) {
+		unsigned long total = 0;
+		double max = 0;
+
+		for (n = 0; n < ncpus; n++) {
+			total += shared[n].count;
+			if (shared[n].elapsed > max)
+				max = shared[n].elapsed;
+		}
+
+		igt_info("Total %s: %'lu cycles: %.3fus%s\n",
+			 e->name, total, max*1e6 / total,
+			 flags & INTERRUPTIBLE ? " (interruptible)" : "");
+	}
+
+	for (n = 0; n < 64; n++)
+		gem_context_destroy(fd, contexts[n]);
+
+	munmap(shared, 4096);
+}
+
+static void all(int fd, uint32_t handle, unsigned flags, int timeout)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	unsigned int engine[16], e;
+	const char *name[16];
+	uint32_t contexts[65];
+	unsigned int nengine;
+	int n, qlen;
+
+	nengine = 0;
+	for_each_physical_engine(fd, e) {
+		engine[nengine] = e;
+		name[nengine] = e__->name;
+		nengine++;
+	}
+	igt_require(nengine);
+
+	for (n = 0; n < ARRAY_SIZE(contexts); n++)
+		contexts[n] = gem_context_create(fd);
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj + 1);
+	execbuf.buffer_count = 1;
+	execbuf.rsvd1 = contexts[0];
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	igt_require(__gem_execbuf(fd, &execbuf) == 0);
+	gem_sync(fd, handle);
+
+	qlen = measure_qlen(fd, &execbuf, engine, nengine, timeout);
+	igt_info("Using timing depth of %d batches\n", qlen);
+
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+
+	for (int pot = 2; pot <= 64; pot *= 2) {
+		for (int nctx = pot - 1; nctx <= pot + 1; nctx++) {
+			igt_fork(child, nengine) {
+				struct timespec start, now;
+				unsigned int count = 0;
+
+				obj[0].handle = gem_create(fd, 4096);
+				execbuf.flags |= engine[child];
+				for (int loop = 0; loop < ARRAY_SIZE(contexts); loop++) {
+					execbuf.rsvd1 = contexts[loop];
+					gem_execbuf(fd, &execbuf);
+				}
+				gem_sync(fd, obj[0].handle);
+
+				clock_gettime(CLOCK_MONOTONIC, &start);
+				do {
+					for (int loop = 0; loop < qlen; loop++) {
+						execbuf.rsvd1 = contexts[loop % nctx];
+						gem_execbuf(fd, &execbuf);
+					}
+					count += qlen;
+					gem_sync(fd, obj[0].handle);
+					clock_gettime(CLOCK_MONOTONIC, &now);
+				} while (elapsed(&start, &now) < timeout);
+				gem_sync(fd, obj[0].handle);
+				clock_gettime(CLOCK_MONOTONIC, &now);
+				gem_close(fd, obj[0].handle);
+
+				igt_info("[%d:%d] %s: %'u cycles: %.3fus%s\n",
+					 nctx, child, name[child], count, elapsed(&start, &now)*1e6 / count,
+					 flags & INTERRUPTIBLE ? " (interruptible)" : "");
+			}
+			igt_waitchildren();
+		}
+	}
+
+	for (n = 0; n < ARRAY_SIZE(contexts); n++)
+		gem_context_destroy(fd, contexts[n]);
+}
+
+igt_main
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	const struct intel_execution_engine *e;
+	uint32_t light = 0, heavy;
+	int fd = -1;
+
+	igt_fixture {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		gem_require_contexts(fd);
+
+		light = gem_create(fd, 4096);
+		gem_write(fd, light, 0, &bbe, sizeof(bbe));
+
+		heavy = gem_create(fd, 4096*1024);
+		gem_write(fd, heavy, 4096*1024-sizeof(bbe), &bbe, sizeof(bbe));
+
+		igt_fork_hang_detector(fd);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("%s%s", e->exec_id == 0 ? "basic-" : "", e->name)
+			single(fd, light, e, 0, 1, 5);
+
+		igt_skip_on_simulation();
+
+		igt_subtest_f("%s%s-heavy", e->exec_id == 0 ? "basic-" : "", e->name)
+			single(fd, heavy, e, 0, 1, 5);
+		igt_subtest_f("%s-interruptible", e->name)
+			single(fd, light, e, INTERRUPTIBLE, 1, 150);
+		igt_subtest_f("forked-%s", e->name)
+			single(fd, light, e, 0, ncpus, 150);
+		igt_subtest_f("forked-%s-heavy", e->name)
+			single(fd, heavy, e, 0, ncpus, 150);
+		igt_subtest_f("forked-%s-interruptible", e->name)
+			single(fd, light, e, INTERRUPTIBLE, ncpus, 150);
+	}
+
+	igt_subtest("basic-all-light")
+		all(fd, light, 0, 5);
+	igt_subtest("basic-all-heavy")
+		all(fd, heavy, 0, 5);
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		gem_close(fd, heavy);
+		gem_close(fd, light);
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_ctx_thrash.c b/tests/i915/gem_ctx_thrash.c
new file mode 100644
index 00000000..b25f95f1
--- /dev/null
+++ b/tests/i915/gem_ctx_thrash.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+
+IGT_TEST_DESCRIPTION("Fill the Gobal GTT with context objects and VMs\n");
+
+#define NUM_THREADS (2*sysconf(_SC_NPROCESSORS_ONLN))
+
+static void xchg_int(void *array, unsigned i, unsigned j)
+{
+	int *A = array;
+	igt_swap(A[i], A[j]);
+}
+
+static unsigned context_size(int fd)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+
+	switch (gen) {
+	case 0:
+	case 1:
+	case 2:
+	case 3:
+	case 4:
+	case 5:
+	case 6:
+	case 7: return 18 << 12;
+	case 8: return 20 << 12;
+	case 9: return 22 << 12;
+	default: return 32 << 12;
+	}
+}
+
+static unsigned get_num_contexts(int fd, int num_engines)
+{
+	uint64_t ggtt_size;
+	unsigned size;
+	unsigned count;
+
+	/* Compute the number of contexts we can allocate to fill the GGTT */
+	ggtt_size = gem_global_aperture_size(fd);
+
+	size = context_size(fd);
+	if (gem_has_execlists(fd)) {
+		size += 4 << 12; /* ringbuffer as well */
+		if (num_engines) /* one per engine with execlists */
+			size *= num_engines;
+	}
+
+	count = 3 * (ggtt_size / size) / 2;
+	igt_info("Creating %lld contexts (assuming of size %lld%s)\n",
+		 (long long)count, (long long)size,
+		 gem_has_execlists(fd) ? " with execlists" : "");
+
+	intel_require_memory(count, size, CHECK_RAM | CHECK_SWAP);
+	return count;
+}
+
+static void single(const char *name, bool all_engines)
+{
+	struct drm_i915_gem_exec_object2 *obj;
+	struct drm_i915_gem_relocation_entry *reloc;
+	unsigned int engines[16], num_engines, num_ctx;
+	uint32_t *ctx, *map, scratch, size;
+	int fd, gen;
+#define MAX_LOOP 16
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+	gem_require_contexts(fd);
+
+	gen = intel_gen(intel_get_drm_devid(fd));
+
+	num_engines = 0;
+	if (all_engines) {
+		unsigned engine;
+
+		for_each_physical_engine(fd, engine) {
+			if (!gem_can_store_dword(fd, engine))
+				continue;
+
+			engines[num_engines++] = engine;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+	} else {
+		igt_require(gem_can_store_dword(fd, 0));
+		engines[num_engines++] = 0;
+	}
+	igt_require(num_engines);
+
+	num_ctx = get_num_contexts(fd, num_engines);
+
+	size = ALIGN(num_ctx * sizeof(uint32_t), 4096);
+	scratch = gem_create(fd, size);
+	gem_set_caching(fd, scratch, I915_CACHING_CACHED);
+	obj = calloc(num_ctx, 3 * sizeof(*obj));
+	reloc = calloc(num_ctx, 2 * sizeof(*reloc));
+
+	ctx = malloc(num_ctx * sizeof(uint32_t));
+	igt_assert(ctx);
+	for (unsigned n = 0; n < num_ctx; n++) {
+		ctx[n] = gem_context_create(fd);
+
+		obj[3*n + 0].handle = gem_create(fd, 4096);
+		reloc[2*n + 0].target_handle = obj[3*n + 0].handle;
+		reloc[2*n + 0].presumed_offset = 0;
+		reloc[2*n + 0].offset = 4000;
+		reloc[2*n + 0].delta = 0;
+		reloc[2*n + 0].read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc[2*n + 0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+		obj[3*n + 1].handle = scratch;
+		reloc[2*n + 1].target_handle = scratch;
+		reloc[2*n + 1].presumed_offset = 0;
+		reloc[2*n + 1].offset = sizeof(uint32_t);
+		reloc[2*n + 1].delta = n * sizeof(uint32_t);
+		reloc[2*n + 1].read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc[2*n + 1].write_domain = 0; /* lies! */
+		if (gen >= 4 && gen < 8)
+			reloc[2*n + 1].offset += sizeof(uint32_t);
+
+		obj[3*n + 2].relocs_ptr = to_user_pointer(&reloc[2*n]);
+		obj[3*n + 2].relocation_count = 2;
+	}
+
+	map = gem_mmap__cpu(fd, scratch, 0, size, PROT_WRITE);
+	for (unsigned int loop = 1; loop <= MAX_LOOP; loop <<= 1) {
+		const unsigned int count = loop * num_ctx;
+		uint32_t *all;
+
+		all = malloc(count * sizeof(uint32_t));
+		for (unsigned int n = 0; n < count; n++)
+			all[n] = ctx[n % num_ctx];
+		igt_permute_array(all, count, xchg_int);
+
+		for (unsigned int n = 0; n < count; n++) {
+			const unsigned int r = n % num_ctx;
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = to_user_pointer(&obj[3*r]),
+				.buffer_count = 3,
+				.flags = engines[n % num_engines],
+				.rsvd1 = all[n],
+			};
+			uint64_t offset =
+				reloc[2*r + 1].presumed_offset +
+				reloc[2*r + 1].delta;
+			uint32_t handle = gem_create(fd, 4096);
+			uint32_t buf[16];
+			int i;
+
+			buf[i = 0] = MI_STORE_DWORD_IMM;
+			if (gen >= 8) {
+				buf[++i] = offset;
+				buf[++i] = offset >> 32;
+			} else if (gen >= 4) {
+				if (gen < 6)
+					buf[i] |= 1 << 22;
+				buf[++i] = 0;
+				buf[++i] = offset;
+			} else {
+				buf[i]--;
+				buf[++i] = offset;
+			}
+			buf[++i] = all[n];
+			buf[++i] = MI_BATCH_BUFFER_END;
+			gem_write(fd, handle, 0, buf, sizeof(buf));
+			obj[3*r + 2].handle = handle;
+
+			gem_execbuf(fd, &execbuf);
+			gem_close(fd, handle);
+		}
+
+		/*
+		 * Note we lied about the write-domain when writing from the
+		 * GPU (in order to avoid inter-ring synchronisation), so now
+		 * we have to force the synchronisation here.
+		 */
+		gem_set_domain(fd, scratch,
+			       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+		for (unsigned int n = count - num_ctx; n < count; n++)
+			igt_assert_eq(map[n % num_ctx], all[n]);
+		free(all);
+	}
+	munmap(map, size);
+
+	free(ctx);
+	close(fd);
+}
+
+static void processes(void)
+{
+	unsigned engines[16], engine;
+	int num_engines;
+	struct rlimit rlim;
+	unsigned num_ctx;
+	uint32_t name;
+	int fd, *fds;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	num_engines = 0;
+	for_each_physical_engine(fd, engine) {
+		engines[num_engines++] = engine;
+		if (num_engines == ARRAY_SIZE(engines))
+			break;
+	}
+
+	num_ctx = get_num_contexts(fd, num_engines);
+
+	/* tweak rlimits to allow us to create this many files */
+	igt_assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
+	if (rlim.rlim_cur < ALIGN(num_ctx + 1024, 1024)) {
+		rlim.rlim_cur = ALIGN(num_ctx + 1024, 1024);
+		if (rlim.rlim_cur > rlim.rlim_max)
+			rlim.rlim_max = rlim.rlim_cur;
+		igt_require(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
+	}
+
+	fds = malloc(num_ctx * sizeof(int));
+	igt_assert(fds);
+	for (unsigned n = 0; n < num_ctx; n++) {
+		fds[n] = drm_open_driver(DRIVER_INTEL);
+		if (fds[n] == -1) {
+			int err = errno;
+			for (unsigned i = n; i--; )
+				close(fds[i]);
+			free(fds);
+			errno = err;
+			igt_assert_f(0, "failed to create context %lld/%lld\n", (long long)n, (long long)num_ctx);
+		}
+	}
+
+	if (1) {
+		uint32_t bbe = MI_BATCH_BUFFER_END;
+		name = gem_create(fd, 4096);
+		gem_write(fd, name, 0, &bbe, sizeof(bbe));
+		name = gem_flink(fd, name);
+	}
+
+	igt_fork(child, NUM_THREADS) {
+		struct drm_i915_gem_execbuffer2 execbuf;
+		struct drm_i915_gem_exec_object2 obj;
+
+		memset(&obj, 0, sizeof(obj));
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(&obj);
+		execbuf.buffer_count = 1;
+
+		igt_permute_array(fds, num_ctx, xchg_int);
+		for (unsigned n = 0; n < num_ctx; n++) {
+			obj.handle = gem_open(fds[n], name);
+			execbuf.flags = engines[n % num_engines];
+			gem_execbuf(fds[n], &execbuf);
+			gem_close(fds[n], obj.handle);
+		}
+	}
+	igt_waitchildren();
+
+	for (unsigned n = 0; n < num_ctx; n++)
+		close(fds[n]);
+	free(fds);
+	close(fd);
+}
+
+struct thread {
+	int fd;
+	uint32_t *all_ctx;
+	unsigned num_ctx;
+	uint32_t batch;
+};
+
+static void *thread(void *data)
+{
+	struct thread *t = data;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	uint32_t *ctx;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = t->batch;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+
+	ctx = malloc(t->num_ctx * sizeof(uint32_t));
+	igt_assert(ctx);
+	memcpy(ctx, t->all_ctx, t->num_ctx * sizeof(uint32_t));
+
+	igt_until_timeout(150) {
+		igt_permute_array(ctx, t->num_ctx, xchg_int);
+		for (unsigned n = 0; n < t->num_ctx; n++) {
+			execbuf.rsvd1 = ctx[n];
+			gem_execbuf(t->fd, &execbuf);
+		}
+	}
+
+	free(ctx);
+
+	return NULL;
+}
+
+static void threads(void)
+{
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	pthread_t threads[NUM_THREADS];
+	struct thread data;
+
+	data.fd = drm_open_driver_render(DRIVER_INTEL);
+	igt_require_gem(data.fd);
+
+	gem_require_contexts(data.fd);
+
+	data.num_ctx = get_num_contexts(data.fd, false);
+	data.all_ctx = malloc(data.num_ctx * sizeof(uint32_t));
+	igt_assert(data.all_ctx);
+	for (unsigned n = 0; n < data.num_ctx; n++)
+		data.all_ctx[n] = gem_context_create(data.fd);
+	data.batch = gem_create(data.fd, 4096);
+	gem_write(data.fd, data.batch, 0, &bbe, sizeof(bbe));
+
+	for (int n = 0; n < NUM_THREADS; n++)
+		pthread_create(&threads[n], NULL, thread, &data);
+
+	for (int n = 0; n < NUM_THREADS; n++)
+		pthread_join(threads[n], NULL);
+
+	close(data.fd);
+}
+
+igt_main
+{
+	igt_skip_on_simulation();
+
+	igt_subtest("single")
+		single("single", false);
+	igt_subtest("engines")
+		single("engines", true);
+
+	igt_subtest("processes")
+		processes();
+
+	igt_subtest("threads")
+		threads();
+}
diff --git a/tests/i915/gem_double_irq_loop.c b/tests/i915/gem_double_irq_loop.c
new file mode 100644
index 00000000..bf69f5ee
--- /dev/null
+++ b/tests/i915/gem_double_irq_loop.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch> (based on gem_storedw_*.c)
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+#include "i830_reg.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer, *blt_bo;
+
+/*
+ * Testcase: Basic check for missed irqs on blt
+ *
+ * Execs one large and then immediately a tiny batch on the blt ring. Then waits
+ * on the second batch. This hopefully catches races in our irq acknowledgement.
+ */
+
+IGT_TEST_DESCRIPTION("Basic check for missed IRQs on blt ring.");
+
+
+#define MI_COND_BATCH_BUFFER_END	(0x36<<23 | 1)
+#define MI_DO_COMPARE			(1<<21)
+static void
+dummy_reloc_loop(void)
+{
+	int i;
+
+	for (i = 0; i < 0x800; i++) {
+		BLIT_COPY_BATCH_START(0);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  4*4096);
+		OUT_BATCH(2048 << 16 | 0);
+		OUT_BATCH((4096) << 16 | (2048));
+		OUT_RELOC_FENCED(blt_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(4*4096);
+		OUT_RELOC_FENCED(blt_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+		intel_batchbuffer_flush(batch);
+
+		BEGIN_BATCH(4, 1);
+		OUT_BATCH(MI_FLUSH_DW | 1);
+		OUT_BATCH(0); /* reserved */
+		OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
+				I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
+		ADVANCE_BATCH();
+		intel_batchbuffer_flush(batch);
+
+		drm_intel_bo_map(target_buffer, 0);
+		// map to force completion
+		drm_intel_bo_unmap(target_buffer);
+	}
+}
+
+igt_simple_main
+{
+	int fd;
+	int devid;
+
+	igt_skip_on_simulation();
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+	devid = intel_get_drm_devid(fd);
+	igt_require_f(HAS_BLT_RING(devid),
+		      "not (yet) implemented for pre-snb\n");
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	igt_assert(bufmgr);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	igt_assert(batch);
+
+	target_buffer = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	igt_assert(target_buffer);
+
+	blt_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4*4096*4096, 4096);
+	igt_assert(blt_bo);
+
+	dummy_reloc_loop();
+
+	drm_intel_bo_unreference(target_buffer);
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
new file mode 100644
index 00000000..5250a414
--- /dev/null
+++ b/tests/i915/gem_eio.c
@@ -0,0 +1,821 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Testcase: Test that only specific ioctl report a wedged GPU.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <signal.h>
+#include <time.h>
+
+#include <drm.h>
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_sysfs.h"
+#include "sw_sync.h"
+
+IGT_TEST_DESCRIPTION("Test that specific ioctls report a wedged GPU (EIO).");
+
+static bool i915_reset_control(bool enable)
+{
+	const char *path = "/sys/module/i915/parameters/reset";
+	int fd, ret;
+
+	igt_debug("%s GPU reset\n", enable ? "Enabling" : "Disabling");
+
+	fd = open(path, O_RDWR);
+	igt_require(fd >= 0);
+
+	ret = write(fd, &"01"[enable], 1) == 1;
+	close(fd);
+
+	return ret;
+}
+
+static void trigger_reset(int fd)
+{
+	struct timespec ts = { };
+
+	igt_nsec_elapsed(&ts);
+
+	igt_force_gpu_reset(fd);
+
+	/* And just check the gpu is indeed running again */
+	igt_debug("Checking that the GPU recovered\n");
+	gem_test_engine(fd, ALL_ENGINES);
+	igt_drop_caches_set(fd, DROP_ACTIVE);
+
+	/* We expect forced reset and health check to be quick. */
+	igt_assert(igt_seconds_elapsed(&ts) < 2);
+}
+
+static void manual_hang(int drm_fd)
+{
+	int dir = igt_debugfs_dir(drm_fd);
+
+	igt_sysfs_set(dir, "i915_wedged", "-1");
+
+	close(dir);
+}
+
+static void wedge_gpu(int fd)
+{
+	/* First idle the GPU then disable GPU resets before injecting a hang */
+	gem_quiescent_gpu(fd);
+
+	igt_require(i915_reset_control(false));
+	manual_hang(fd);
+	igt_assert(i915_reset_control(true));
+}
+
+static int __gem_throttle(int fd)
+{
+	int err = 0;
+	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL))
+		err = -errno;
+	return err;
+}
+
+static void test_throttle(int fd)
+{
+	wedge_gpu(fd);
+
+	igt_assert_eq(__gem_throttle(fd), -EIO);
+
+	trigger_reset(fd);
+}
+
+static void test_execbuf(int fd)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec;
+	uint32_t tmp[] = { MI_BATCH_BUFFER_END };
+
+	memset(&exec, 0, sizeof(exec));
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	exec.handle = gem_create(fd, 4096);
+	gem_write(fd, exec.handle, 0, tmp, sizeof(tmp));
+
+	execbuf.buffers_ptr = to_user_pointer(&exec);
+	execbuf.buffer_count = 1;
+
+	wedge_gpu(fd);
+
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EIO);
+	gem_close(fd, exec.handle);
+
+	trigger_reset(fd);
+}
+
+static int __gem_wait(int fd, uint32_t handle, int64_t timeout)
+{
+	struct drm_i915_gem_wait wait = {
+		.bo_handle = handle,
+		.timeout_ns = timeout,
+	};
+	int err;
+
+	err = 0;
+	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT, &wait))
+		err = -errno;
+
+	errno = 0;
+	return err;
+}
+
+static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags)
+{
+	struct igt_spin_factory opts = {
+		.ctx = ctx,
+		.engine = flags,
+		.flags = IGT_SPIN_FAST,
+	};
+
+	if (gem_can_store_dword(fd, opts.engine))
+		opts.flags |= IGT_SPIN_POLL_RUN;
+
+	return __igt_spin_batch_factory(fd, &opts);
+}
+
+static void __spin_wait(int fd, igt_spin_t *spin)
+{
+	if (spin->running) {
+		igt_spin_busywait_until_running(spin);
+	} else {
+		igt_debug("__spin_wait - usleep mode\n");
+		usleep(500e3); /* Better than nothing! */
+	}
+}
+
+static igt_spin_t * spin_sync(int fd, uint32_t ctx, unsigned long flags)
+{
+	igt_spin_t *spin = __spin_poll(fd, ctx, flags);
+
+	__spin_wait(fd, spin);
+
+	return spin;
+}
+
+struct hang_ctx {
+	int debugfs;
+	struct timespec delay;
+	struct timespec *ts;
+	timer_t timer;
+};
+
+static void hang_handler(union sigval arg)
+{
+	struct hang_ctx *ctx = arg.sival_ptr;
+
+	igt_debug("hang delay = %.2fus\n",
+		  igt_nsec_elapsed(&ctx->delay) / 1000.0);
+
+	igt_nsec_elapsed(ctx->ts);
+	igt_assert(igt_sysfs_set(ctx->debugfs, "i915_wedged", "-1"));
+
+	igt_assert_eq(timer_delete(ctx->timer), 0);
+	close(ctx->debugfs);
+	free(ctx);
+}
+
+static void hang_after(int fd, unsigned int us, struct timespec *ts)
+{
+	struct sigevent sev = {
+		.sigev_notify = SIGEV_THREAD,
+		.sigev_notify_function = hang_handler
+	};
+	struct itimerspec its = {
+		.it_value.tv_sec = us / USEC_PER_SEC,
+		.it_value.tv_nsec = us % USEC_PER_SEC * 1000,
+	};
+	struct hang_ctx *ctx;
+
+	ctx = calloc(1, sizeof(*ctx));
+	igt_assert(ctx);
+
+	ctx->debugfs = igt_debugfs_dir(fd);
+	igt_assert_fd(ctx->debugfs);
+
+	sev.sigev_value.sival_ptr = ctx;
+
+	igt_assert_eq(timer_create(CLOCK_MONOTONIC, &sev, &ctx->timer), 0);
+
+	ctx->ts = ts;
+	igt_nsec_elapsed(&ctx->delay);
+
+	igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0);
+}
+
+static void check_wait(int fd, uint32_t bo, unsigned int wait)
+{
+	struct timespec ts = {};
+	uint64_t elapsed;
+
+	if (wait) {
+		hang_after(fd, wait, &ts);
+	} else {
+		igt_nsec_elapsed(&ts);
+		manual_hang(fd);
+	}
+
+	gem_sync(fd, bo);
+
+	elapsed = igt_nsec_elapsed(&ts);
+	igt_assert_f(elapsed < 250e6,
+		     "Wake up following reset+wedge took %.3fms\n",
+		     elapsed*1e-6);
+}
+
+static void __test_banned(int fd)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(fd, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+	};
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	unsigned long count = 0;
+
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	gem_quiescent_gpu(fd);
+	igt_require(i915_reset_control(true));
+
+	igt_until_timeout(5) {
+		igt_spin_t *hang;
+
+		if (__gem_execbuf(fd, &execbuf) == -EIO) {
+			igt_info("Banned after causing %lu hangs\n", count);
+			igt_assert(count > 1);
+			return;
+		}
+
+		/* Trigger a reset, making sure we are detected as guilty */
+		hang = spin_sync(fd, 0, 0);
+		trigger_reset(fd);
+		igt_spin_batch_free(fd, hang);
+
+		count++;
+	}
+
+	igt_assert_f(false,
+		     "Ran for 5s, %lu hangs without being banned\n",
+		     count);
+}
+
+static void test_banned(int fd)
+{
+	fd = gem_reopen_driver(fd);
+	__test_banned(fd);
+	close(fd);
+}
+
+#define TEST_WEDGE (1)
+
+static void test_wait(int fd, unsigned int flags, unsigned int wait)
+{
+	igt_spin_t *hang;
+
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
+	/*
+	 * If the request we wait on completes due to a hang (even for
+	 * that request), the user expects the return value to 0 (success).
+	 */
+
+	if (flags & TEST_WEDGE)
+		igt_require(i915_reset_control(false));
+	else
+		igt_require(i915_reset_control(true));
+
+	hang = spin_sync(fd, 0, I915_EXEC_DEFAULT);
+
+	check_wait(fd, hang->handle, wait);
+
+	igt_spin_batch_free(fd, hang);
+
+	igt_require(i915_reset_control(true));
+
+	trigger_reset(fd);
+	close(fd);
+}
+
+static void test_suspend(int fd, int state)
+{
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
+	/* Do a suspend first so that we don't skip inside the test */
+	igt_system_suspend_autoresume(state, SUSPEND_TEST_DEVICES);
+
+	/* Check we can suspend when the driver is already wedged */
+	igt_require(i915_reset_control(false));
+	manual_hang(fd);
+
+	igt_system_suspend_autoresume(state, SUSPEND_TEST_DEVICES);
+
+	igt_require(i915_reset_control(true));
+	trigger_reset(fd);
+	close(fd);
+}
+
+static void test_inflight(int fd, unsigned int wait)
+{
+	int parent_fd = fd;
+	unsigned int engine;
+
+	igt_require_gem(fd);
+	igt_require(gem_has_exec_fence(fd));
+
+	for_each_engine(parent_fd, engine) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj[2];
+		struct drm_i915_gem_execbuffer2 execbuf;
+		igt_spin_t *hang;
+		int fence[64]; /* conservative estimate of ring size */
+
+		fd = gem_reopen_driver(parent_fd);
+		igt_require_gem(fd);
+
+		memset(obj, 0, sizeof(obj));
+		obj[0].flags = EXEC_OBJECT_WRITE;
+		obj[1].handle = gem_create(fd, 4096);
+		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+		gem_quiescent_gpu(fd);
+		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
+		igt_require(i915_reset_control(false));
+
+		hang = spin_sync(fd, 0, engine);
+		obj[0].handle = hang->handle;
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(obj);
+		execbuf.buffer_count = 2;
+		execbuf.flags = engine | I915_EXEC_FENCE_OUT;
+
+		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
+			gem_execbuf_wr(fd, &execbuf);
+			fence[n] = execbuf.rsvd2 >> 32;
+			igt_assert(fence[n] != -1);
+		}
+
+		check_wait(fd, obj[1].handle, wait);
+
+		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
+			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
+			close(fence[n]);
+		}
+
+		igt_spin_batch_free(fd, hang);
+		igt_assert(i915_reset_control(true));
+		trigger_reset(fd);
+
+		gem_close(fd, obj[1].handle);
+		close(fd);
+	}
+}
+
+static void test_inflight_suspend(int fd)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	int fence[64]; /* conservative estimate of ring size */
+	igt_spin_t *hang;
+
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+	igt_require(gem_has_exec_fence(fd));
+	igt_require(i915_reset_control(false));
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].flags = EXEC_OBJECT_WRITE;
+	obj[1].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+	hang = spin_sync(fd, 0, 0);
+	obj[0].handle = hang->handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = I915_EXEC_FENCE_OUT;
+
+	for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
+		gem_execbuf_wr(fd, &execbuf);
+		fence[n] = execbuf.rsvd2 >> 32;
+		igt_assert(fence[n] != -1);
+	}
+
+	igt_set_autoresume_delay(30);
+	igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE);
+
+	check_wait(fd, obj[1].handle, 10);
+
+	for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
+		igt_assert_eq(sync_fence_status(fence[n]), -EIO);
+		close(fence[n]);
+	}
+
+	igt_spin_batch_free(fd, hang);
+	igt_assert(i915_reset_control(true));
+	trigger_reset(fd);
+	close(fd);
+}
+
+static uint32_t context_create_safe(int i915)
+{
+	struct drm_i915_gem_context_param param;
+
+	memset(&param, 0, sizeof(param));
+
+	param.ctx_id = gem_context_create(i915);
+	param.param = I915_CONTEXT_PARAM_BANNABLE;
+	gem_context_set_param(i915, &param);
+
+	param.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE;
+	param.value = 1;
+	gem_context_set_param(i915, &param);
+
+	return param.ctx_id;
+}
+
+static void test_inflight_contexts(int fd, unsigned int wait)
+{
+	int parent_fd = fd;
+	unsigned int engine;
+
+	igt_require_gem(fd);
+	igt_require(gem_has_exec_fence(fd));
+	gem_require_contexts(fd);
+
+	for_each_engine(parent_fd, engine) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj[2];
+		struct drm_i915_gem_execbuffer2 execbuf;
+		igt_spin_t *hang;
+		uint32_t ctx[64];
+		int fence[64];
+
+		fd = gem_reopen_driver(parent_fd);
+		igt_require_gem(fd);
+
+		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
+			ctx[n] = context_create_safe(fd);
+
+		gem_quiescent_gpu(fd);
+
+		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
+		igt_require(i915_reset_control(false));
+
+		memset(obj, 0, sizeof(obj));
+		obj[0].flags = EXEC_OBJECT_WRITE;
+		obj[1].handle = gem_create(fd, 4096);
+		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+		hang = spin_sync(fd, 0, engine);
+		obj[0].handle = hang->handle;
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(obj);
+		execbuf.buffer_count = 2;
+		execbuf.flags = engine | I915_EXEC_FENCE_OUT;
+
+		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
+			execbuf.rsvd1 = ctx[n];
+			gem_execbuf_wr(fd, &execbuf);
+			fence[n] = execbuf.rsvd2 >> 32;
+			igt_assert(fence[n] != -1);
+		}
+
+		check_wait(fd, obj[1].handle, wait);
+
+		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
+			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
+			close(fence[n]);
+		}
+
+		igt_spin_batch_free(fd, hang);
+		gem_close(fd, obj[1].handle);
+		igt_assert(i915_reset_control(true));
+		trigger_reset(fd);
+
+		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
+			gem_context_destroy(fd, ctx[n]);
+
+		close(fd);
+	}
+}
+
+static void test_inflight_external(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	igt_spin_t *hang;
+	uint32_t fence;
+	IGT_CORK_FENCE(cork);
+
+	igt_require_sw_sync();
+	igt_require(gem_has_exec_fence(fd));
+
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
+	fence = igt_cork_plug(&cork, fd);
+
+	igt_require(i915_reset_control(false));
+	hang = __spin_poll(fd, 0, 0);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = I915_EXEC_FENCE_IN | I915_EXEC_FENCE_OUT;
+	execbuf.rsvd2 = (uint32_t)fence;
+
+	gem_execbuf_wr(fd, &execbuf);
+	close(fence);
+
+	fence = execbuf.rsvd2 >> 32;
+	igt_assert(fence != -1);
+
+	__spin_wait(fd, hang);
+	manual_hang(fd);
+
+	gem_sync(fd, hang->handle); /* wedged, with an unready batch */
+	igt_assert(!gem_bo_busy(fd, hang->handle));
+	igt_assert(gem_bo_busy(fd, obj.handle));
+	igt_cork_unplug(&cork); /* only now submit our batches */
+
+	igt_assert_eq(__gem_wait(fd, obj.handle, -1), 0);
+	igt_assert_eq(sync_fence_status(fence), -EIO);
+	close(fence);
+
+	igt_spin_batch_free(fd, hang);
+	igt_assert(i915_reset_control(true));
+	trigger_reset(fd);
+	close(fd);
+}
+
+static void test_inflight_internal(int fd, unsigned int wait)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	unsigned engine, nfence = 0;
+	int fences[16];
+	igt_spin_t *hang;
+
+	igt_require(gem_has_exec_fence(fd));
+
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
+	igt_require(i915_reset_control(false));
+	hang = spin_sync(fd, 0, 0);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = hang->handle;
+	obj[0].flags = EXEC_OBJECT_WRITE;
+	obj[1].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	for_each_engine(fd, engine) {
+		execbuf.flags = engine | I915_EXEC_FENCE_OUT;
+
+		gem_execbuf_wr(fd, &execbuf);
+
+		fences[nfence] = execbuf.rsvd2 >> 32;
+		igt_assert(fences[nfence] != -1);
+		nfence++;
+	}
+
+	check_wait(fd, obj[1].handle, wait);
+
+	while (nfence--) {
+		igt_assert_eq(sync_fence_status(fences[nfence]), -EIO);
+		close(fences[nfence]);
+	}
+
+	igt_spin_batch_free(fd, hang);
+	igt_assert(i915_reset_control(true));
+	trigger_reset(fd);
+	close(fd);
+}
+
+static void reset_stress(int fd,
+			 uint32_t ctx0, unsigned int engine,
+			 unsigned int flags)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(fd, 4096)
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = engine,
+	};
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	igt_until_timeout(5) {
+		uint32_t ctx = context_create_safe(fd);
+		igt_spin_t *hang;
+		unsigned int i;
+
+		gem_quiescent_gpu(fd);
+
+		igt_require(i915_reset_control(flags & TEST_WEDGE ?
+					       false : true));
+
+		/*
+		 * Start executing a spin batch with some queued batches
+		 * against a different context after it.
+		 */
+		hang = spin_sync(fd, ctx0, engine);
+
+		execbuf.rsvd1 = ctx;
+		for (i = 0; i < 10; i++)
+			gem_execbuf(fd, &execbuf);
+
+		execbuf.rsvd1 = ctx0;
+		for (i = 0; i < 10; i++)
+			gem_execbuf(fd, &execbuf);
+
+		/* Wedge after a small delay. */
+		check_wait(fd, obj.handle, 100e3);
+
+		/* Unwedge by forcing a reset. */
+		igt_assert(i915_reset_control(true));
+		trigger_reset(fd);
+
+		gem_quiescent_gpu(fd);
+
+		/*
+		 * Verify that we are able to submit work after unwedging from
+		 * both contexts.
+		 */
+		execbuf.rsvd1 = ctx;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		execbuf.rsvd1 = ctx0;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		gem_sync(fd, obj.handle);
+		igt_spin_batch_free(fd, hang);
+		gem_context_destroy(fd, ctx);
+	}
+
+	gem_close(fd, obj.handle);
+}
+
+/*
+ * Verify that we can submit and execute work after unwedging the GPU.
+ */
+static void test_reset_stress(int fd, unsigned int flags)
+{
+	uint32_t ctx0 = context_create_safe(fd);
+	unsigned int engine;
+
+	for_each_engine(fd, engine)
+		reset_stress(fd, ctx0, engine, flags);
+
+	gem_context_destroy(fd, ctx0);
+}
+
+static int fd = -1;
+
+static void
+exit_handler(int sig)
+{
+	i915_reset_control(true);
+	igt_force_gpu_reset(fd);
+}
+
+igt_main
+{
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_device_drop_master(fd);
+
+		igt_require(i915_reset_control(true));
+		igt_force_gpu_reset(fd);
+		igt_install_exit_handler(exit_handler);
+
+		gem_submission_print_method(fd);
+		igt_require_gem(fd);
+
+		igt_allow_hang(fd, 0, 0);
+	}
+
+	igt_subtest("throttle")
+		test_throttle(fd);
+
+	igt_subtest("execbuf")
+		test_execbuf(fd);
+
+	igt_subtest("banned")
+		test_banned(fd);
+
+	igt_subtest("suspend")
+		test_suspend(fd, SUSPEND_STATE_MEM);
+
+	igt_subtest("hibernate")
+		test_suspend(fd, SUSPEND_STATE_DISK);
+
+	igt_subtest("in-flight-external")
+		test_inflight_external(fd);
+
+	igt_subtest("in-flight-suspend")
+		test_inflight_suspend(fd);
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_has_contexts(fd));
+		}
+
+		igt_subtest("reset-stress")
+			test_reset_stress(fd, 0);
+
+		igt_subtest("unwedge-stress")
+			test_reset_stress(fd, TEST_WEDGE);
+	}
+
+	igt_subtest_group {
+		const struct {
+			unsigned int wait;
+			const char *name;
+		} waits[] = {
+			{ .wait = 0, .name = "immediate" },
+			{ .wait = 1, .name = "1us" },
+			{ .wait = 10000, .name = "10ms" },
+		};
+		unsigned int i;
+
+		for (i = 0; i < sizeof(waits) / sizeof(waits[0]); i++) {
+			igt_subtest_f("wait-%s", waits[i].name)
+				test_wait(fd, 0, waits[i].wait);
+
+			igt_subtest_f("wait-wedge-%s", waits[i].name)
+				test_wait(fd, TEST_WEDGE, waits[i].wait);
+
+			igt_subtest_f("in-flight-%s", waits[i].name)
+				test_inflight(fd, waits[i].wait);
+
+			igt_subtest_f("in-flight-contexts-%s", waits[i].name)
+				test_inflight_contexts(fd, waits[i].wait);
+
+			igt_subtest_f("in-flight-internal-%s", waits[i].name) {
+				igt_skip_on(gem_has_semaphores(fd));
+				test_inflight_internal(fd, waits[i].wait);
+			}
+		}
+	}
+}
diff --git a/tests/i915/gem_evict_alignment.c b/tests/i915/gem_evict_alignment.c
new file mode 100644
index 00000000..140d5583
--- /dev/null
+++ b/tests/i915/gem_evict_alignment.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright © 2011,2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/*
+ * Testcase: run a couple of big batches to force the unbind on misalignment code.
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Run a couple of big batches to force the unbind on"
+		     " misalignment code.");
+
+#define HEIGHT 256
+#define WIDTH 1024
+
+static void
+copy(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo,
+     uint64_t n_bo, uint64_t alignment, int error)
+{
+	uint32_t batch[12];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 *obj;
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int n, i=0;
+
+	batch[i++] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB | 6);
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i - 1] += 2;
+	batch[i++] = (3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  WIDTH*4;
+	batch[i++] = 0; /* dst x1,y1 */
+	batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
+	batch[i++] = 0; /* dst reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0; /* FIXME */
+	batch[i++] = 0; /* src x1,y1 */
+	batch[i++] = WIDTH*4;
+	batch[i++] = 0; /* src reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0; /* FIXME */
+	batch[i++] = MI_BATCH_BUFFER_END;
+	batch[i++] = MI_NOOP;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	reloc[0].target_handle = dst;
+	reloc[0].delta = 0;
+	reloc[0].offset = 4 * sizeof(batch[0]);
+	reloc[0].presumed_offset = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	reloc[1].target_handle = src;
+	reloc[1].delta = 0;
+	reloc[1].offset = 7 * sizeof(batch[0]);
+	reloc[1].presumed_offset = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+
+	obj = calloc(n_bo + 1, sizeof(*obj));
+	for (n = 0; n < n_bo; n++) {
+		obj[n].handle = all_bo[n];
+		obj[n].alignment = alignment;
+	}
+	obj[n].handle = handle;
+	obj[n].relocation_count = 2;
+	obj[n].relocs_ptr = to_user_pointer(reloc);
+
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.buffer_count = n_bo + 1;
+	exec.batch_start_offset = 0;
+	exec.batch_len = i * 4;
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	igt_assert_eq(__gem_execbuf(fd, &exec), -error);
+
+	gem_close(fd, handle);
+	free(obj);
+}
+
+static void minor_evictions(int fd, uint64_t size, uint64_t count)
+{
+	uint32_t *bo, *sel;
+	uint64_t n, m, alignment;
+	int pass, fail;
+
+	intel_require_memory(2 * count, size, CHECK_RAM);
+
+	bo = malloc(3*count*sizeof(*bo));
+	igt_assert(bo);
+
+	for (n = 0; n < 2*count; n++)
+		bo[n] = gem_create(fd, size);
+
+	sel = bo + n;
+	for (alignment = m = 4096; alignment <= size; alignment <<= 1) {
+		for (fail = 0; fail < 10; fail++) {
+			for (pass = 0; pass < 100; pass++) {
+				for (n = 0; n < count; n++, m += 7)
+					sel[n] = bo[m%(2*count)];
+				copy(fd, sel[0], sel[1], sel, count, alignment, 0);
+			}
+			copy(fd, bo[0], bo[0], bo, 2*count, alignment, ENOSPC);
+		}
+	}
+
+	for (n = 0; n < 2*count; n++)
+		gem_close(fd, bo[n]);
+	free(bo);
+}
+
+static void major_evictions(int fd, uint64_t size, uint64_t count)
+{
+	uint64_t n, m, alignment, max;
+	int loop;
+	uint32_t *bo;
+
+	intel_require_memory(count, size, CHECK_RAM);
+
+	bo = malloc(count*sizeof(*bo));
+	igt_assert(bo);
+
+	for (n = 0; n < count; n++)
+		bo[n] = gem_create(fd, size);
+
+	max = gem_aperture_size(fd) - size;
+	for (alignment = m = 4096; alignment < max; alignment <<= 1) {
+		for (loop = 0; loop < 100; loop++, m += 17) {
+			n = m % count;
+			copy(fd, bo[n], bo[n], &bo[n], 1, alignment, 0);
+		}
+	}
+
+	for (n = 0; n < count; n++)
+		gem_close(fd, bo[n]);
+	free(bo);
+}
+
+#define MAX_32b ((1ull << 32) - 4096)
+
+igt_main
+{
+	uint64_t size, count;
+	int fd = -1;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		igt_fork_hang_detector(fd);
+	}
+
+	igt_subtest("minor-normal") {
+		size = 1024 * 1024;
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / size / 4;
+		minor_evictions(fd, size, count);
+	}
+
+	igt_subtest("major-normal") {
+		size = gem_aperture_size(fd);
+		if (size >> 32)
+			size = MAX_32b;
+		size = 3 * size / 4;
+		count = 4;
+		major_evictions(fd, size, count);
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+	}
+
+	igt_fork_signal_helper();
+	igt_subtest("minor-interruptible") {
+		size = 1024 * 1024;
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / size / 4;
+		minor_evictions(fd, size, count);
+	}
+
+	igt_subtest("major-interruptible") {
+		size = gem_aperture_size(fd);
+		if (size >> 32)
+			size = MAX_32b;
+		size = 3 * size / 4;
+		count = 4;
+		major_evictions(fd, size, count);
+	}
+
+	igt_subtest("minor-hang") {
+		igt_fork_hang_helper();
+		size = 1024 * 1024;
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / size / 4;
+		minor_evictions(fd, size, count);
+	}
+
+	igt_subtest("major-hang") {
+		size = gem_aperture_size(fd);
+		if (size >> 32)
+			size = MAX_32b;
+		size = 3 * size / 4;
+		count = 4;
+		major_evictions(fd, size, count);
+	}
+	igt_stop_signal_helper();
+
+	igt_fixture {
+		igt_stop_hang_helper();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_evict_everything.c b/tests/i915/gem_evict_everything.c
new file mode 100644
index 00000000..f3607648
--- /dev/null
+++ b/tests/i915/gem_evict_everything.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright © 2011,2012,2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/*
+ * Testcase: run a couple of big batches to force the eviction code.
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+#include "eviction_common.c"
+
+IGT_TEST_DESCRIPTION("Run a couple of big batches to force the eviction"
+		     " code.");
+
+#define HEIGHT 256
+#define WIDTH 1024
+
+static int
+copy(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo)
+{
+	uint32_t batch[12];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 *obj;
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int n, ret, i=0;
+
+	batch[i++] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB | 6);
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i - 1] += 2;
+	batch[i++] = (3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  WIDTH*4;
+	batch[i++] = 0; /* dst x1,y1 */
+	batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
+	batch[i++] = 0; /* dst reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0; /* FIXME */
+	batch[i++] = 0; /* src x1,y1 */
+	batch[i++] = WIDTH*4;
+	batch[i++] = 0; /* src reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0; /* FIXME */
+	batch[i++] = MI_BATCH_BUFFER_END;
+	batch[i++] = MI_NOOP;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	reloc[0].target_handle = dst;
+	reloc[0].delta = 0;
+	reloc[0].offset = 4 * sizeof(batch[0]);
+	reloc[0].presumed_offset = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	reloc[1].target_handle = src;
+	reloc[1].delta = 0;
+	reloc[1].offset = 7 * sizeof(batch[0]);
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		reloc[1].offset += sizeof(batch[0]);
+	reloc[1].presumed_offset = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+
+	obj = calloc(n_bo + 1, sizeof(*obj));
+	for (n = 0; n < n_bo; n++)
+		obj[n].handle = all_bo[n];
+	obj[n].handle = handle;
+	obj[n].relocation_count = 2;
+	obj[n].relocs_ptr = to_user_pointer(reloc);
+
+	memset(&exec, 0, sizeof(exec));
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.buffer_count = n_bo + 1;
+	if (HAS_BLT_RING(intel_get_drm_devid(fd)))
+		exec.flags |= I915_EXEC_BLT;
+	ret = __gem_execbuf(fd, &exec);
+
+	gem_close(fd, handle);
+	free(obj);
+
+	return ret;
+}
+
+static void clear(int fd, uint32_t handle, uint64_t size)
+{
+	void *base = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE);
+	memset(base, 0, size);
+	munmap(base, size);
+}
+
+static struct igt_eviction_test_ops fault_ops = {
+	.create = gem_create,
+	.close = gem_close,
+	.copy = copy,
+	.clear = clear,
+};
+
+static void test_forking_evictions(int fd, uint64_t size, uint64_t count,
+				   unsigned flags)
+{
+	uint64_t trash_count;
+
+	trash_count = intel_get_total_ram_mb() * 11 / 10;
+	intel_require_memory(trash_count, size, CHECK_RAM | CHECK_SWAP);
+
+	forking_evictions(fd, &fault_ops, size, count, trash_count, flags);
+}
+
+static void test_mlocked_evictions(int fd, uint64_t size, uint64_t count)
+{
+	mlocked_evictions(fd, &fault_ops, size, count);
+}
+
+static void test_swapping_evictions(int fd, uint64_t size, uint64_t count)
+{
+	int trash_count;
+
+	trash_count = intel_get_total_ram_mb() * 11 / 10;
+	intel_require_memory(trash_count, size, CHECK_RAM | CHECK_SWAP);
+
+	swapping_evictions(fd, &fault_ops, size, count, trash_count);
+}
+
+static void test_minor_evictions(int fd, uint64_t size, uint64_t count)
+{
+	minor_evictions(fd, &fault_ops, size, count);
+}
+
+static void test_major_evictions(int fd, uint64_t size, uint64_t count)
+{
+	major_evictions(fd, &fault_ops, size, count);
+}
+
+#define MAX_32b ((1ull << 32) - 4096)
+
+igt_main
+{
+	uint64_t size, count;
+	int fd;
+
+	size = count = 0;
+	fd = -1;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		size = 1024 * 1024;
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / size / 4;
+
+		igt_fork_hang_detector(fd);
+	}
+
+	for (unsigned flags = 0; flags < ALL_FORKING_EVICTIONS + 1; flags++) {
+		igt_subtest_f("forked%s%s%s-%s",
+		    flags & FORKING_EVICTIONS_SWAPPING ? "-swapping" : "",
+		    flags & FORKING_EVICTIONS_DUP_DRMFD ? "-multifd" : "",
+		    flags & FORKING_EVICTIONS_MEMORY_PRESSURE ?
+				"-mempressure" : "",
+		    flags & FORKING_EVICTIONS_INTERRUPTIBLE ?
+				"interruptible" : "normal") {
+			test_forking_evictions(fd, size, count, flags);
+		}
+	}
+
+	igt_subtest("mlocked-normal")
+		test_mlocked_evictions(fd, size, count);
+
+	igt_subtest("swapping-normal")
+		test_swapping_evictions(fd, size, count);
+
+	igt_subtest("minor-normal")
+		test_minor_evictions(fd, size, count);
+
+	igt_subtest("major-normal") {
+		size = gem_aperture_size(fd);
+		if (size >> 32)
+			size = MAX_32b;
+		size = 3 * size / 4;
+		count = 4;
+		test_major_evictions(fd, size, count);
+	}
+
+	igt_fixture {
+		size = 1024 * 1024;
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / size / 4;
+	}
+
+	igt_fork_signal_helper();
+
+	igt_subtest("mlocked-interruptible")
+		test_mlocked_evictions(fd, size, count);
+
+	igt_subtest("swapping-interruptible")
+		test_swapping_evictions(fd, size, count);
+
+	igt_subtest("minor-interruptible")
+		test_minor_evictions(fd, size, count);
+
+	igt_subtest("major-interruptible") {
+		size = gem_aperture_size(fd);
+		if (size >> 32)
+			size = MAX_32b;
+		size = 3 * size / 4;
+		count = 4;
+		test_major_evictions(fd, size, count);
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		igt_fork_hang_helper();
+
+		size = 1024 * 1024;
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / size / 4;
+	}
+
+	igt_subtest("mlocked-hang")
+		test_mlocked_evictions(fd, size, count);
+
+	igt_subtest("swapping-hang")
+		test_swapping_evictions(fd, size, count);
+
+	igt_subtest("minor-hang")
+		test_minor_evictions(fd, size, count);
+
+	igt_subtest("major-hang") {
+		size = gem_aperture_size(fd);
+		if (size >> 32)
+			size = MAX_32b;
+		size = 3 * size / 4;
+		count = 4;
+		test_major_evictions(fd, size, count);
+	}
+
+	igt_stop_signal_helper();
+
+	igt_fixture {
+		igt_stop_hang_helper();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_alignment.c b/tests/i915/gem_exec_alignment.c
new file mode 100644
index 00000000..a10571c9
--- /dev/null
+++ b/tests/i915/gem_exec_alignment.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/* Exercises the basic execbuffer using object alignments */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Exercises the basic execbuffer using object alignments");
+
+static uint32_t find_last_bit(uint64_t x)
+{
+	uint32_t i = 0;
+	while (x) {
+		x >>= 1;
+		i++;
+	}
+	return i;
+}
+
+static uint32_t file_max(void)
+{
+	static uint32_t max;
+	if (max == 0) {
+		FILE *file = fopen("/proc/sys/fs/file-max", "r");
+		max = 80000;
+		if (file) {
+			igt_assert(fscanf(file, "%d", &max) == 1);
+			fclose(file);
+		}
+		max /= 2;
+	}
+	return max;
+}
+
+static void many(int fd)
+{
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 *execobj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint64_t gtt_size, ram_size;
+	uint64_t alignment, max_alignment, count, i;
+
+	gtt_size = gem_aperture_size(fd);
+	if (!gem_uses_full_ppgtt(fd))
+		gtt_size /= 2; /* We have to *share* our GTT! */
+	ram_size = intel_get_total_ram_mb();
+	ram_size *= 1024 * 1024;
+	count = ram_size / 4096;
+	if (count > file_max()) /* vfs cap */
+		count = file_max();
+	max_alignment = find_last_bit(gtt_size / count);
+	if (max_alignment <= 13)
+		max_alignment = 4096;
+	else
+		max_alignment = 1ull << (max_alignment - 1);
+	count = gtt_size / max_alignment / 2;
+
+	igt_info("gtt_size=%lld MiB, max-alignment=%lld, count=%lld\n",
+		 (long long)gtt_size/1024/1024,
+		 (long long)max_alignment,
+		 (long long)count);
+	intel_require_memory(count, 4096, CHECK_RAM);
+
+	execobj = calloc(sizeof(*execobj), count + 1);
+	igt_assert(execobj);
+
+	for (i = 0; i < count; i++) {
+		execobj[i].handle = gem_create(fd, 4096);
+		if ((gtt_size-1) >> 32)
+			execobj[i].flags = 1<<3; /* EXEC_OBJECT_SUPPORTS_48B_ADDRESS */
+	}
+	execobj[i].handle = gem_create(fd, 4096);
+	if ((gtt_size-1) >> 32)
+		execobj[i].flags = 1<<3; /* EXEC_OBJECT_SUPPORTS_48B_ADDRESS */
+	gem_write(fd, execobj[i].handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(execobj);
+	execbuf.buffer_count = count + 1;
+	igt_require(__gem_execbuf(fd, &execbuf) == 0);
+
+	for (alignment = 4096; alignment < gtt_size; alignment <<= 1) {
+		for (i = 0; i < count; i++)
+			execobj[i].alignment = alignment;
+		if (alignment > max_alignment) {
+			uint64_t factor = alignment / max_alignment;
+			execbuf.buffer_count = 2*count / factor;
+			execbuf.buffers_ptr =
+				to_user_pointer(execobj + count - execbuf.buffer_count + 1);
+		}
+
+		igt_debug("testing %lld x alignment=%#llx [%db]\n",
+			  (long long)execbuf.buffer_count - 1,
+			  (long long)alignment,
+			  find_last_bit(alignment)-1);
+		gem_execbuf(fd, &execbuf);
+		for(i = count - execbuf.buffer_count + 1; i < count; i++) {
+			igt_assert_eq_u64(execobj[i].alignment, alignment);
+			igt_assert_eq_u64(execobj[i].offset % alignment, 0);
+		}
+	}
+
+	for (i = 0; i < count; i++)
+		gem_close(fd, execobj[i].handle);
+	gem_close(fd, execobj[i].handle);
+	free(execobj);
+}
+
+static void single(int fd)
+{
+	struct drm_i915_gem_exec_object2 execobj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch = MI_BATCH_BUFFER_END;
+	uint64_t gtt_size;
+	int non_pot;
+
+	memset(&execobj, 0, sizeof(execobj));
+	execobj.handle = gem_create(fd, 4096);
+	execobj.flags = 1<<3; /* EXEC_OBJECT_SUPPORTS_48B_ADDRESS */
+	gem_write(fd, execobj.handle, 0, &batch, sizeof(batch));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&execobj);
+	execbuf.buffer_count = 1;
+
+	gtt_size = gem_aperture_size(fd);
+	if (__gem_execbuf(fd, &execbuf)) {
+		execobj.flags = 0;
+		gtt_size = 1ull << 32;
+		gem_execbuf(fd, &execbuf);
+	}
+
+	execobj.alignment = 3*4096;
+	non_pot = __gem_execbuf(fd, &execbuf) == 0;
+	igt_debug("execbuffer() accepts non-power-of-two alignment? %s\n",
+		  non_pot ? "yes" : "no");
+
+	for (execobj.alignment = 4096;
+	     execobj.alignment <= 64<<20;
+	     execobj.alignment += 4096) {
+		if (!non_pot && execobj.alignment & -execobj.alignment)
+			continue;
+
+		igt_debug("starting offset: %#llx, next alignment: %#llx\n",
+			  (long long)execobj.offset,
+			  (long long)execobj.alignment);
+		gem_execbuf(fd, &execbuf);
+		igt_assert_eq_u64(execobj.offset % execobj.alignment, 0);
+	}
+
+	for (execobj.alignment = 4096;
+	     execobj.alignment < gtt_size;
+	     execobj.alignment <<= 1) {
+		igt_debug("starting offset: %#llx, next alignment: %#llx [%db]\n",
+			  (long long)execobj.offset,
+			  (long long)execobj.alignment,
+			  find_last_bit(execobj.alignment)-1);
+		gem_execbuf(fd, &execbuf);
+		igt_assert_eq_u64(execobj.offset % execobj.alignment, 0);
+	}
+
+	gem_close(fd, execobj.handle);
+}
+
+igt_main
+{
+	int fd = -1;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	igt_subtest("single") /* basic! */
+		single(fd);
+	igt_subtest("many")
+		many(fd);
+
+}
diff --git a/tests/i915/gem_exec_async.c b/tests/i915/gem_exec_async.c
new file mode 100644
index 00000000..9a06af7e
--- /dev/null
+++ b/tests/i915/gem_exec_async.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+
+#define LOCAL_OBJECT_ASYNC (1 << 6)
+#define LOCAL_PARAM_HAS_EXEC_ASYNC 43
+
+IGT_TEST_DESCRIPTION("Check that we can issue concurrent writes across the engines.");
+
+static void store_dword(int fd, unsigned ring,
+			uint32_t target, uint32_t offset, uint32_t value)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[16];
+	int i;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = target;
+	obj[0].flags = LOCAL_OBJECT_ASYNC;
+	obj[1].handle = gem_create(fd, 4096);
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj[0].handle;
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = offset;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	obj[1].relocs_ptr = to_user_pointer(&reloc);
+	obj[1].relocation_count = 1;
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = offset;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = offset;
+		reloc.offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = offset;
+	}
+	batch[++i] = value;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, obj[1].handle);
+}
+
+static void one(int fd, unsigned ring, uint32_t flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[2];
+#define SCRATCH 0
+#define BATCH 1
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned int other;
+	uint32_t *batch;
+	int i;
+
+	/* On the target ring, create a looping batch that marks
+	 * the scratch for write. Then on the other rings try and
+	 * write into that target. If it blocks we hang the GPU...
+	 */
+
+	memset(obj, 0, sizeof(obj));
+	obj[SCRATCH].handle = gem_create(fd, 4096);
+
+	obj[BATCH].handle = gem_create(fd, 4096);
+	obj[BATCH].relocs_ptr = to_user_pointer(&reloc);
+	obj[BATCH].relocation_count = 1;
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj[BATCH].handle; /* recurse */
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = 0;
+	reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
+	reloc.write_domain = 0;
+
+	batch = gem_mmap__wc(fd, obj[BATCH].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, obj[BATCH].handle,
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	i = 0;
+	batch[i] = MI_BATCH_BUFFER_START;
+	if (gen >= 8) {
+		batch[i] |= 1 << 8 | 1;
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 6) {
+		batch[i] |= 1 << 8;
+		batch[++i] = 0;
+	} else {
+		batch[i] |= 2 << 6;
+		batch[++i] = 0;
+		if (gen < 4) {
+			batch[i] |= 1;
+			reloc.delta = 1;
+		}
+	}
+	i++;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring | flags;
+	igt_require(__gem_execbuf(fd, &execbuf) == 0);
+	gem_close(fd, obj[BATCH].handle);
+
+	i = 0;
+	for_each_physical_engine(fd, other) {
+		if (other == ring)
+			continue;
+
+		if (!gem_can_store_dword(fd, other))
+			continue;
+
+		store_dword(fd, other, obj[SCRATCH].handle, 4*i, i);
+		i++;
+	}
+
+	*batch = MI_BATCH_BUFFER_END;
+	__sync_synchronize();
+	munmap(batch, 4096);
+
+	batch = gem_mmap__wc(fd, obj[SCRATCH].handle, 0, 4096, PROT_READ);
+	/* The kernel only tracks the last *submitted* write (but all reads),
+	 * so to ensure *all* rings are flushed, we flush all reads even
+	 * though we only need read access for ourselves.
+	 */
+	gem_set_domain(fd, obj[SCRATCH].handle,
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(fd, obj[SCRATCH].handle);
+	while (i--)
+		igt_assert_eq_u32(batch[i], i);
+	munmap(batch, 4096);
+}
+
+static bool has_async_execbuf(int fd)
+{
+	drm_i915_getparam_t gp;
+	int async = -1;
+
+	gp.param = LOCAL_PARAM_HAS_EXEC_ASYNC;
+	gp.value = &async;
+	drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+
+	return async > 0;
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(fd);
+		gem_require_mmap_wc(fd);
+		igt_require(has_async_execbuf(fd));
+		igt_require(gem_can_store_dword(fd, 0));
+		igt_fork_hang_detector(fd);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		/* default exec-id is purely symbolic */
+		if (e->exec_id == 0)
+			continue;
+
+		igt_subtest_f("concurrent-writes-%s", e->name) {
+			igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+			igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
+			one(fd, e->exec_id, e->flags);
+		}
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_await.c b/tests/i915/gem_exec_await.c
new file mode 100644
index 00000000..5cfeb8ec
--- /dev/null
+++ b/tests/i915/gem_exec_await.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_rand.h"
+#include "igt_sysfs.h"
+#include "igt_vgem.h"
+#include "i915/gem_ring.h"
+
+#include <sys/ioctl.h>
+#include <sys/signal.h>
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+	return ((end->tv_sec - start->tv_sec) +
+		(end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+static void xchg_obj(void *array, unsigned i, unsigned j)
+{
+	struct drm_i915_gem_exec_object2 *obj = array;
+	uint64_t tmp;
+
+	tmp = obj[i].handle;
+	obj[i].handle = obj[j].handle;
+	obj[j].handle = tmp;
+
+	tmp = obj[i].offset;
+	obj[i].offset = obj[j].offset;
+	obj[j].offset = tmp;
+}
+
+#define CONTEXTS 0x1
+static void wide(int fd, int ring_size, int timeout, unsigned int flags)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct {
+		struct drm_i915_gem_exec_object2 *obj;
+		struct drm_i915_gem_exec_object2 exec[2];
+		struct drm_i915_gem_relocation_entry reloc;
+		struct drm_i915_gem_execbuffer2 execbuf;
+		uint32_t *cmd;
+	} *exec;
+	struct drm_i915_gem_exec_object2 *obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned engines[16];
+	unsigned nengine, engine;
+	unsigned long count;
+	double time;
+
+	nengine = 0;
+	for_each_physical_engine(fd, engine)
+		engines[nengine++] = engine;
+	igt_require(nengine);
+
+	exec = calloc(nengine, sizeof(*exec));
+	igt_assert(exec);
+
+	intel_require_memory(nengine*(2 + ring_size), 4096, CHECK_RAM);
+	obj = calloc(nengine*ring_size + 1, sizeof(*obj));
+	igt_assert(obj);
+
+	for (unsigned e = 0; e < nengine; e++) {
+		exec[e].obj = calloc(ring_size, sizeof(*exec[e].obj));
+		igt_assert(exec[e].obj);
+		for (unsigned n = 0; n < ring_size; n++)  {
+			exec[e].obj[n].handle = gem_create(fd, 4096);
+			exec[e].obj[n].flags = EXEC_OBJECT_WRITE;
+
+			obj[e*ring_size + n].handle = exec[e].obj[n].handle;
+		}
+
+		exec[e].execbuf.buffers_ptr = to_user_pointer(exec[e].exec);
+		exec[e].execbuf.buffer_count = 1;
+		exec[e].execbuf.flags = (engines[e] |
+					 LOCAL_I915_EXEC_NO_RELOC |
+					 LOCAL_I915_EXEC_HANDLE_LUT);
+
+		if (flags & CONTEXTS) {
+			exec[e].execbuf.rsvd1 = gem_context_create(fd);
+		}
+
+		exec[e].exec[0].handle = gem_create(fd, 4096);
+		exec[e].cmd = gem_mmap__wc(fd, exec[e].exec[0].handle,
+					   0, 4096, PROT_WRITE);
+
+		gem_set_domain(fd, exec[e].exec[0].handle,
+			       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+		exec[e].cmd[0] = MI_BATCH_BUFFER_END;
+
+		gem_execbuf(fd, &exec[e].execbuf);
+		exec[e].exec[1] = exec[e].exec[0];
+		exec[e].execbuf.buffer_count = 2;
+
+		exec[e].reloc.target_handle = 1; /* recurse */
+		exec[e].reloc.offset = sizeof(uint32_t);
+		exec[e].reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
+		if (gen < 4)
+			exec[e].reloc.delta = 1;
+
+		exec[e].exec[1].relocs_ptr = to_user_pointer(&exec[e].reloc);
+		exec[e].exec[1].relocation_count = 1;
+	}
+	obj[nengine*ring_size].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[nengine*ring_size].handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj[nengine*ring_size]);
+	execbuf.buffer_count = 1;
+	gem_execbuf(fd, &execbuf); /* tag the object as a batch in the GTT */
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = nengine*ring_size + 1;
+
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	time = 0;
+	count = 0;
+	igt_until_timeout(timeout) {
+		struct timespec start, now;
+		for (unsigned e = 0; e < nengine; e++) {
+			uint64_t address;
+			int i;
+
+			if (flags & CONTEXTS) {
+				gem_context_destroy(fd, exec[e].execbuf.rsvd1);
+				exec[e].execbuf.rsvd1 = gem_context_create(fd);
+			}
+
+			exec[e].reloc.presumed_offset = exec[e].exec[1].offset;
+			address = (exec[e].reloc.presumed_offset +
+				   exec[e].reloc.delta);
+			gem_set_domain(fd, exec[e].exec[1].handle,
+				       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+			i = 0;
+			exec[e].cmd[i] = MI_BATCH_BUFFER_START;
+			if (gen >= 8) {
+				exec[e].cmd[i] |= 1 << 8 | 1;
+				exec[e].cmd[++i] = address;
+				exec[e].cmd[++i] = address >> 32;
+			} else if (gen >= 6) {
+				exec[e].cmd[i] |= 1 << 8;
+				exec[e].cmd[++i] = address;
+			} else {
+				exec[e].cmd[i] |= 2 << 6;
+				exec[e].cmd[++i] = address;
+			}
+
+			exec[e].exec[0] = obj[nengine*ring_size];
+			gem_execbuf(fd, &exec[e].execbuf);
+
+			for (unsigned n = 0; n < ring_size; n++) {
+				exec[e].exec[0] = exec[e].obj[n];
+				gem_execbuf(fd, &exec[e].execbuf);
+				exec[e].obj[n].offset = exec[e].exec[0].offset;
+			}
+		}
+
+		igt_permute_array(obj, nengine*ring_size, xchg_obj);
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		for (unsigned e = 0; e < nengine; e++) {
+			execbuf.flags = (engines[e] |
+					 LOCAL_I915_EXEC_NO_RELOC |
+					 LOCAL_I915_EXEC_HANDLE_LUT);
+			gem_execbuf(fd, &execbuf);
+		}
+		clock_gettime(CLOCK_MONOTONIC, &now);
+		time += elapsed(&start, &now);
+		count += nengine;
+
+		for (unsigned e = 0; e < nengine; e++)
+			exec[e].cmd[0] = MI_BATCH_BUFFER_END;
+		__sync_synchronize();
+	}
+
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	igt_info("%s: %'lu cycles: %.3fus\n",
+		 __func__, count, time*1e6 / count);
+
+	gem_close(fd, obj[nengine*ring_size].handle);
+	free(obj);
+
+	for (unsigned e = 0; e < nengine; e++) {
+		if (flags & CONTEXTS)
+			gem_context_destroy(fd, exec[e].execbuf.rsvd1);
+
+		for (unsigned n = 0; n < ring_size; n++)
+			gem_close(fd, exec[e].obj[n].handle);
+		free(exec[e].obj);
+
+		munmap(exec[e].cmd, 4096);
+		gem_close(fd, exec[e].exec[1].handle);
+	}
+	free(exec);
+}
+
+igt_main
+{
+	int ring_size = 0;
+	int device = -1;
+
+	igt_fixture {
+
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+		gem_submission_print_method(device);
+
+		ring_size = gem_measure_ring_inflight(device, ALL_ENGINES, 0) - 10;
+		if (!gem_has_execlists(device))
+			ring_size /= 2;
+		igt_info("Ring size: %d batches\n", ring_size);
+		igt_require(ring_size > 0);
+
+		igt_fork_hang_detector(device);
+	}
+
+	igt_subtest("wide-all")
+		wide(device, ring_size, 20, 0);
+
+	igt_subtest("wide-contexts") {
+		gem_require_contexts(device);
+		wide(device, ring_size, 20, CONTEXTS);
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(device);
+	}
+}
diff --git a/tests/i915/gem_exec_bad_domains.c b/tests/i915/gem_exec_bad_domains.c
new file mode 100644
index 00000000..cd2c8956
--- /dev/null
+++ b/tests/i915/gem_exec_bad_domains.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+/* Testcase: Test whether the kernel rejects relocations with non-gpu domains
+ *
+ * If it does not, it'll oops somewhen later on because we don't expect that.
+ */
+
+IGT_TEST_DESCRIPTION("Test whether the kernel rejects relocations with non-gpu"
+		     " domains.");
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+#define BAD_GTT_DEST ((512*1024*1024)) /* past end of aperture */
+
+static int
+run_batch(void)
+{
+	unsigned int used = batch->ptr - batch->buffer;
+	int ret;
+
+	if (used == 0)
+		return 0;
+
+	/* Round batchbuffer usage to 2 DWORDs. */
+	if ((used & 4) == 0) {
+		*(uint32_t *) (batch->ptr) = 0; /* noop */
+		batch->ptr += 4;
+	}
+
+	/* Mark the end of the buffer. */
+	*(uint32_t *)(batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+	batch->ptr += 4;
+	used = batch->ptr - batch->buffer;
+
+	ret = drm_intel_bo_subdata(batch->bo, 0, used, batch->buffer);
+	igt_assert_eq(ret, 0);
+
+	batch->ptr = NULL;
+
+	ret = drm_intel_bo_mrb_exec(batch->bo, used, NULL, 0, 0, 0);
+
+	intel_batchbuffer_reset(batch);
+
+	return ret;
+}
+
+#define I915_GEM_GPU_DOMAINS \
+	(I915_GEM_DOMAIN_RENDER | \
+	 I915_GEM_DOMAIN_SAMPLER | \
+	 I915_GEM_DOMAIN_COMMAND | \
+	 I915_GEM_DOMAIN_INSTRUCTION | \
+	 I915_GEM_DOMAIN_VERTEX)
+
+static void multi_write_domain(int fd)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[2];
+	struct drm_i915_gem_relocation_entry reloc[1];
+	uint32_t handle, handle_target;
+
+	handle = gem_create(fd, 4096);
+	handle_target = gem_create(fd, 4096);
+
+	exec[0].handle = handle_target;
+	exec[0].relocation_count = 0;
+	exec[0].relocs_ptr = 0;
+	exec[0].alignment = 0;
+	exec[0].offset = 0;
+	exec[0].flags = 0;
+	exec[0].rsvd1 = 0;
+	exec[0].rsvd2 = 0;
+
+	exec[1].handle = handle;
+	exec[1].relocation_count = 1;
+	exec[1].relocs_ptr = to_user_pointer(reloc);
+	exec[1].alignment = 0;
+	exec[1].offset = 0;
+	exec[1].flags = 0;
+	exec[1].rsvd1 = 0;
+	exec[1].rsvd2 = 0;
+
+	reloc[0].offset = 4;
+	reloc[0].delta = 0;
+	reloc[0].target_handle = handle_target;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION;
+	reloc[0].presumed_offset = 0;
+
+	execbuf.buffers_ptr = to_user_pointer(exec);
+	execbuf.buffer_count = 2;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 8;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = 0;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+	gem_close(fd, handle);
+	gem_close(fd, handle_target);
+}
+
+int fd;
+drm_intel_bo *tmp;
+
+igt_main
+{
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+		batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+		tmp = drm_intel_bo_alloc(bufmgr, "tmp", 128 * 128, 4096);
+	}
+
+	igt_subtest("cpu-domain") {
+		BEGIN_BATCH(2, 1);
+		OUT_BATCH(0);
+		OUT_RELOC(tmp, I915_GEM_DOMAIN_CPU, 0, 0);
+		ADVANCE_BATCH();
+		igt_assert(run_batch() == -EINVAL);
+
+		BEGIN_BATCH(2, 1);
+		OUT_BATCH(0);
+		OUT_RELOC(tmp, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU, 0);
+		ADVANCE_BATCH();
+		igt_assert(run_batch() == -EINVAL);
+	}
+
+	igt_subtest("gtt-domain") {
+		BEGIN_BATCH(2, 1);
+		OUT_BATCH(0);
+		OUT_RELOC(tmp, I915_GEM_DOMAIN_GTT, 0, 0);
+		ADVANCE_BATCH();
+		igt_assert(run_batch() == -EINVAL);
+
+		BEGIN_BATCH(2, 1);
+		OUT_BATCH(0);
+		OUT_RELOC(tmp, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0);
+		ADVANCE_BATCH();
+		igt_assert(run_batch() == -EINVAL);
+	}
+
+	/* Note: Older kernels disallow this. Punt on the skip check though
+	 * since this is too old. */
+	igt_subtest("conflicting-write-domain") {
+		BEGIN_BATCH(4, 2);
+		OUT_BATCH(0);
+		OUT_RELOC(tmp, I915_GEM_DOMAIN_RENDER,
+			  I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0);
+		OUT_RELOC(tmp, I915_GEM_DOMAIN_INSTRUCTION,
+			  I915_GEM_DOMAIN_INSTRUCTION, 0);
+		ADVANCE_BATCH();
+		igt_assert(run_batch() == 0);
+	}
+
+	igt_subtest("double-write-domain")
+		multi_write_domain(fd);
+
+	igt_subtest("invalid-gpu-domain") {
+		BEGIN_BATCH(2, 1);
+		OUT_BATCH(0);
+		OUT_RELOC(tmp, ~(I915_GEM_GPU_DOMAINS | I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU),
+			  0, 0);
+		ADVANCE_BATCH();
+		igt_assert(run_batch() == -EINVAL);
+
+		BEGIN_BATCH(2, 1);
+		OUT_BATCH(0);
+		OUT_RELOC(tmp, I915_GEM_DOMAIN_GTT << 1,
+			  I915_GEM_DOMAIN_GTT << 1, 0);
+		ADVANCE_BATCH();
+		igt_assert(run_batch() == -EINVAL);
+	}
+
+	igt_fixture {
+		intel_batchbuffer_free(batch);
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_basic.c b/tests/i915/gem_exec_basic.c
new file mode 100644
index 00000000..dcb83864
--- /dev/null
+++ b/tests/i915/gem_exec_basic.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+
+IGT_TEST_DESCRIPTION("Basic sanity check of execbuf-ioctl rings.");
+
+static uint32_t batch_create(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static void batch_fini(int fd, uint32_t handle)
+{
+	gem_sync(fd, handle); /* catch any GPU hang */
+	gem_close(fd, handle);
+}
+
+static void noop(int fd, unsigned ring)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec;
+
+	gem_require_ring(fd, ring);
+
+	memset(&exec, 0, sizeof(exec));
+
+	exec.handle = batch_create(fd);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&exec);
+	execbuf.buffer_count = 1;
+	execbuf.flags = ring;
+	gem_execbuf(fd, &execbuf);
+
+	batch_fini(fd, exec.handle);
+}
+
+static void readonly(int fd, unsigned ring)
+{
+	struct drm_i915_gem_execbuffer2 *execbuf;
+	struct drm_i915_gem_exec_object2 exec;
+
+	gem_require_ring(fd, ring);
+
+	memset(&exec, 0, sizeof(exec));
+	exec.handle = batch_create(fd);
+
+	execbuf = mmap(NULL, 4096, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+	igt_assert(execbuf != NULL);
+
+	execbuf->buffers_ptr = to_user_pointer(&exec);
+	execbuf->buffer_count = 1;
+	execbuf->flags = ring;
+	igt_assert(mprotect(execbuf, 4096, PROT_READ) == 0);
+
+	gem_execbuf(fd, execbuf);
+
+	munmap(execbuf, 4096);
+
+	batch_fini(fd, exec.handle);
+}
+
+static void gtt(int fd, unsigned ring)
+{
+	struct drm_i915_gem_execbuffer2 *execbuf;
+	struct drm_i915_gem_exec_object2 *exec;
+	uint32_t handle;
+
+	gem_require_ring(fd, ring);
+
+	handle = gem_create(fd, 4096);
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	execbuf = gem_mmap__gtt(fd, handle, 4096, PROT_WRITE);
+	exec = (struct drm_i915_gem_exec_object2 *)(execbuf + 1);
+	gem_close(fd, handle);
+
+	exec->handle = batch_create(fd);
+
+	execbuf->buffers_ptr = to_user_pointer(exec);
+	execbuf->buffer_count = 1;
+	execbuf->flags = ring;
+
+	gem_execbuf(fd, execbuf);
+
+	batch_fini(fd, exec->handle);
+	munmap(execbuf, 4096);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd = -1;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		igt_fork_hang_detector(fd);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("basic-%s", e->name)
+			noop(fd, e->exec_id | e->flags);
+		igt_subtest_f("readonly-%s", e->name)
+			readonly(fd, e->exec_id | e->flags);
+		igt_subtest_f("gtt-%s", e->name)
+			gtt(fd, e->exec_id | e->flags);
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_big.c b/tests/i915/gem_exec_big.c
new file mode 100644
index 00000000..a15672f6
--- /dev/null
+++ b/tests/i915/gem_exec_big.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright © 2011,2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/*
+ * Testcase: run a nop batch which is really big
+ *
+ * Mostly useful to stress-test the error-capture code
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Run a large nop batch to stress test the error capture"
+		     " code.");
+
+#define FORCE_PREAD_PWRITE 0
+
+static int use_64bit_relocs;
+
+static void exec1(int fd, uint32_t handle, uint64_t reloc_ofs, unsigned flags, char *ptr)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec[1];
+	struct drm_i915_gem_relocation_entry gem_reloc[1];
+
+	gem_reloc[0].offset = reloc_ofs;
+	gem_reloc[0].delta = 0;
+	gem_reloc[0].target_handle = handle;
+	gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	gem_reloc[0].write_domain = 0;
+	gem_reloc[0].presumed_offset = 0;
+
+	gem_exec[0].handle = handle;
+	gem_exec[0].relocation_count = 1;
+	gem_exec[0].relocs_ptr = to_user_pointer(gem_reloc);
+	gem_exec[0].alignment = 0;
+	gem_exec[0].offset = 0;
+	gem_exec[0].flags = 0;
+	gem_exec[0].rsvd1 = 0;
+	gem_exec[0].rsvd2 = 0;
+
+	execbuf.buffers_ptr = to_user_pointer(gem_exec);
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 8;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = flags;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	/* Avoid hitting slowpaths in the reloc processing which might yield a
+	 * presumed_offset of -1. Happens when the batch is still busy from the
+	 * last round. */
+	gem_sync(fd, handle);
+
+	gem_execbuf(fd, &execbuf);
+
+	igt_warn_on(gem_reloc[0].presumed_offset == -1);
+
+	if (use_64bit_relocs) {
+		uint64_t tmp;
+		if (ptr)
+			tmp = *(uint64_t *)(ptr+reloc_ofs);
+		else
+			gem_read(fd, handle, reloc_ofs, &tmp, sizeof(tmp));
+		igt_assert_eq(tmp, gem_reloc[0].presumed_offset);
+	} else {
+		uint32_t tmp;
+		if (ptr)
+			tmp = *(uint32_t *)(ptr+reloc_ofs);
+		else
+			gem_read(fd, handle, reloc_ofs, &tmp, sizeof(tmp));
+		igt_assert_eq(tmp, gem_reloc[0].presumed_offset);
+	}
+}
+
+static void xchg_reloc(void *array, unsigned i, unsigned j)
+{
+	struct drm_i915_gem_relocation_entry *reloc = array;
+	struct drm_i915_gem_relocation_entry *a = &reloc[i];
+	struct drm_i915_gem_relocation_entry *b = &reloc[j];
+	struct drm_i915_gem_relocation_entry tmp;
+
+	tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+static void execN(int fd, uint32_t handle, uint64_t batch_size, unsigned flags, char *ptr)
+{
+#define reloc_ofs(N, T) ((((N)+1) << 12) - 4*(1 + ((N) == ((T)-1))))
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec[1];
+	struct drm_i915_gem_relocation_entry *gem_reloc;
+	uint64_t n, nreloc = batch_size >> 12;
+
+	gem_reloc = calloc(nreloc, sizeof(*gem_reloc));
+	igt_assert(gem_reloc);
+
+	for (n = 0; n < nreloc; n++) {
+		gem_reloc[n].offset = reloc_ofs(n, nreloc);
+		gem_reloc[n].target_handle = handle;
+		gem_reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
+		gem_reloc[n].presumed_offset = n ^ 0xbeefdeaddeadbeef;
+		if (ptr) {
+			if (use_64bit_relocs)
+				*(uint64_t *)(ptr + gem_reloc[n].offset) = gem_reloc[n].presumed_offset;
+			else
+				*(uint32_t *)(ptr + gem_reloc[n].offset) = gem_reloc[n].presumed_offset;
+		} else
+			gem_write(fd, handle, gem_reloc[n].offset, &gem_reloc[n].presumed_offset, 4*(1+use_64bit_relocs));
+	}
+
+	memset(gem_exec, 0, sizeof(gem_exec));
+	gem_exec[0].handle = handle;
+	gem_exec[0].relocation_count = nreloc;
+	gem_exec[0].relocs_ptr = to_user_pointer(gem_reloc);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(gem_exec);
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 8;
+	execbuf.flags = flags;
+
+	/* Avoid hitting slowpaths in the reloc processing which might yield a
+	 * presumed_offset of -1. Happens when the batch is still busy from the
+	 * last round. */
+	gem_sync(fd, handle);
+
+	igt_permute_array(gem_reloc, nreloc, xchg_reloc);
+
+	gem_execbuf(fd, &execbuf);
+	for (n = 0; n < nreloc; n++)
+		igt_warn_on(gem_reloc[n].presumed_offset == -1);
+
+	if (use_64bit_relocs) {
+		for (n = 0; n < nreloc; n++) {
+			uint64_t tmp;
+			if (ptr)
+				tmp = *(uint64_t *)(ptr+reloc_ofs(n, nreloc));
+			else
+				gem_read(fd, handle, reloc_ofs(n, nreloc), &tmp, sizeof(tmp));
+			igt_assert_eq(tmp, gem_reloc[n].presumed_offset);
+		}
+	} else {
+		for (n = 0; n < nreloc; n++) {
+			uint32_t tmp;
+			if (ptr)
+				tmp = *(uint32_t *)(ptr+reloc_ofs(n, nreloc));
+			else
+				gem_read(fd, handle, reloc_ofs(n, nreloc), &tmp, sizeof(tmp));
+			igt_assert_eq(tmp, gem_reloc[n].presumed_offset);
+		}
+	}
+
+	free(gem_reloc);
+#undef reloc_ofs
+}
+
+igt_simple_main
+{
+	uint32_t batch[2] = {MI_BATCH_BUFFER_END};
+	uint64_t batch_size, max, ggtt_max, reloc_ofs;
+	int fd;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+
+	use_64bit_relocs = intel_gen(intel_get_drm_devid(fd)) >= 8;
+
+	max = 3 * gem_aperture_size(fd) / 4;
+	ggtt_max = 3 * gem_global_aperture_size(fd) / 4;
+	intel_require_memory(1, max, CHECK_RAM);
+
+	for (batch_size = 4096; batch_size <= max; ) {
+		uint32_t handle;
+		void *ptr;
+
+		handle = gem_create(fd, batch_size);
+		gem_write(fd, handle, 0, batch, sizeof(batch));
+
+		if (!FORCE_PREAD_PWRITE && gem_has_llc(fd))
+			ptr = __gem_mmap__cpu(fd, handle, 0, batch_size, PROT_READ);
+		else if (!FORCE_PREAD_PWRITE && gem_mmap__has_wc(fd))
+			ptr = __gem_mmap__wc(fd, handle, 0, batch_size, PROT_READ);
+		else
+			ptr = NULL;
+
+		igt_debug("Forwards (%lld)\n", (long long)batch_size);
+		for (reloc_ofs = 4096; reloc_ofs < batch_size; reloc_ofs += 4096) {
+			igt_debug("batch_size %llu, reloc_ofs %llu\n",
+				  (long long)batch_size, (long long)reloc_ofs);
+			exec1(fd, handle, reloc_ofs, 0, ptr);
+			if (batch_size < ggtt_max)
+				exec1(fd, handle, reloc_ofs, I915_EXEC_SECURE, ptr);
+		}
+
+		igt_debug("Backwards (%lld)\n", (long long)batch_size);
+		for (reloc_ofs = batch_size - 4096; reloc_ofs; reloc_ofs -= 4096) {
+			igt_debug("batch_size %llu, reloc_ofs %llu\n",
+				  (long long)batch_size, (long long)reloc_ofs);
+			exec1(fd, handle, reloc_ofs, 0, ptr);
+			if (batch_size < ggtt_max)
+				exec1(fd, handle, reloc_ofs, I915_EXEC_SECURE, ptr);
+		}
+
+		igt_debug("Random (%lld)\n", (long long)batch_size);
+		execN(fd, handle, batch_size, 0, ptr);
+		if (batch_size < ggtt_max)
+			execN(fd, handle, batch_size, I915_EXEC_SECURE, ptr);
+
+		if (ptr)
+			munmap(ptr, batch_size);
+		gem_madvise(fd, handle, I915_MADV_DONTNEED);
+
+		if (batch_size < max && 2*batch_size > max)
+			batch_size = max;
+		else
+			batch_size *= 2;
+	}
+
+	close(fd);
+}
diff --git a/tests/i915/gem_exec_blt.c b/tests/i915/gem_exec_blt.c
new file mode 100644
index 00000000..8d61dc87
--- /dev/null
+++ b/tests/i915/gem_exec_blt.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+#define OBJECT_SIZE 16384
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+#define BLT_SRC_TILED		(1<<15)
+#define BLT_DST_TILED		(1<<11)
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+static int gem_linear_blt(int fd,
+			  uint32_t *batch,
+			  uint32_t src,
+			  uint32_t dst,
+			  uint32_t length,
+			  struct drm_i915_gem_relocation_entry *reloc)
+{
+	uint32_t *b = batch;
+	int height = length / (16 * 1024);
+
+	igt_assert_lte(height, 1 << 16);
+
+	if (height) {
+		int i = 0;
+		b[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			b[i-1]+=2;
+		b[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
+		b[i++] = 0;
+		b[i++] = height << 16 | (4*1024);
+		b[i++] = 0;
+		reloc->offset = (b-batch+4) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = dst;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc->presumed_offset = 0;
+		reloc++;
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			b[i++] = 0; /* FIXME */
+
+		b[i++] = 0;
+		b[i++] = 16*1024;
+		b[i++] = 0;
+		reloc->offset = (b-batch+7) * sizeof(uint32_t);
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			reloc->offset += sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = src;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = 0;
+		reloc->presumed_offset = 0;
+		reloc++;
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			b[i++] = 0; /* FIXME */
+
+		b += i;
+		length -= height * 16*1024;
+	}
+
+	if (length) {
+		int i = 0;
+		b[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			b[i-1]+=2;
+		b[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
+		b[i++] = height << 16;
+		b[i++] = (1+height) << 16 | (length / 4);
+		b[i++] = 0;
+		reloc->offset = (b-batch+4) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = dst;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc->presumed_offset = 0;
+		reloc++;
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			b[i++] = 0; /* FIXME */
+
+		b[i++] = height << 16;
+		b[i++] = 16*1024;
+		b[i++] = 0;
+		reloc->offset = (b-batch+7) * sizeof(uint32_t);
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			reloc->offset += sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = src;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = 0;
+		reloc->presumed_offset = 0;
+		reloc++;
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			b[i++] = 0; /* FIXME */
+
+		b += i;
+	}
+
+	b[0] = MI_BATCH_BUFFER_END;
+	b[1] = 0;
+
+	return (b+2 - batch) * sizeof(uint32_t);
+}
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+static const char *bytes_per_sec(char *buf, double v)
+{
+	const char *order[] = {
+		"",
+		"KiB",
+		"MiB",
+		"GiB",
+		"TiB",
+		"PiB",
+		NULL,
+	}, **o = order;
+
+	while (v > 1024 && o[1]) {
+		v /= 1024;
+		o++;
+	}
+	sprintf(buf, "%.1f%s/s", v, *o);
+	return buf;
+}
+
+static int dcmp(const void *A, const void *B)
+{
+	const double *a = A, *b = B;
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void run(int object_size, bool dumb)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[3];
+	struct drm_i915_gem_relocation_entry reloc[4];
+	uint32_t buf[20];
+	uint32_t handle, src, dst;
+	int fd, len, count;
+	int ring;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+
+	if (dumb)
+		handle = kmstest_dumb_create(fd, 32, 32, 32, NULL, NULL);
+	else
+		handle = gem_create(fd, 4096);
+
+	src = gem_create(fd, object_size);
+	dst = gem_create(fd, object_size);
+
+	len = gem_linear_blt(fd, buf, 0, 1, object_size, reloc);
+	gem_write(fd, handle, 0, buf, len);
+
+	memset(exec, 0, sizeof(exec));
+	exec[0].handle = src;
+	exec[1].handle = dst;
+
+	exec[2].handle = handle;
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		exec[2].relocation_count = len > 56 ? 4 : 2;
+	else
+		exec[2].relocation_count = len > 40 ? 4 : 2;
+	exec[2].relocs_ptr = to_user_pointer(reloc);
+
+	ring = 0;
+	if (HAS_BLT_RING(intel_get_drm_devid(fd)))
+		ring = I915_EXEC_BLT;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(exec);
+	execbuf.buffer_count = 3;
+	execbuf.batch_len = len;
+	execbuf.flags = ring;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+	if (__gem_execbuf(fd, &execbuf)) {
+		len = gem_linear_blt(fd, buf, src, dst, object_size, reloc);
+		igt_assert(len == execbuf.batch_len);
+		gem_write(fd, handle, 0, buf, len);
+		execbuf.flags = ring;
+		gem_execbuf(fd, &execbuf);
+	}
+	gem_sync(fd, handle);
+
+	for (count = 1; count <= 1<<12; count <<= 1) {
+		struct timeval start, end;
+		const int reps = 9;
+		double t[reps], sum;
+		int n;
+
+		for (n = 0; n < reps; n++) {
+			gettimeofday(&start, NULL);
+			for (int loop = 0; loop < count; loop++)
+				gem_execbuf(fd, &execbuf);
+			gem_sync(fd, handle);
+			gettimeofday(&end, NULL);
+			t[n] = elapsed(&start, &end, count);
+		}
+		qsort(t, n, sizeof(double), dcmp);
+		sum = 0;
+		for (n = 2; n < reps - 2; n++)
+			sum += t[n];
+		sum /= reps - 4;
+		igt_info("Time to blt %d bytes x %6d:	%7.3fµs, %s\n",
+			 object_size, count, sum,
+			 bytes_per_sec((char *)buf, object_size/sum*1e6));
+		fflush(stdout);
+	}
+	gem_close(fd, handle);
+
+	close(fd);
+}
+
+static int sysfs_read(const char *name)
+{
+	char buf[4096];
+	int sysfd;
+	int len;
+
+	sprintf(buf, "/sys/class/drm/card%d/%s",
+		drm_get_card(), name);
+	sysfd = open(buf, O_RDONLY);
+	if (sysfd < 0)
+		return -1;
+
+	len = read(sysfd, buf, sizeof(buf)-1);
+	close(sysfd);
+	if (len < 0)
+		return -1;
+
+	buf[len] = '\0';
+	return atoi(buf);
+}
+
+static int sysfs_write(const char *name, int value)
+{
+	char buf[4096];
+	int sysfd;
+	int len;
+
+	sprintf(buf, "/sys/class/drm/card%d/%s",
+		drm_get_card(), name);
+	sysfd = open(buf, O_WRONLY);
+	if (sysfd < 0)
+		return -1;
+
+	len = sprintf(buf, "%d", value);
+	len = write(sysfd, buf, len);
+	close(sysfd);
+
+	if (len < 0)
+		return len;
+
+	return 0;
+}
+
+static void set_auto_freq(void)
+{
+	int min = sysfs_read("gt_RPn_freq_mhz");
+	int max = sysfs_read("gt_RP0_freq_mhz");
+	if (max <= min)
+		return;
+
+	igt_debug("Setting min to %dMHz, and max to %dMHz\n", min, max);
+	sysfs_write("gt_min_freq_mhz", min);
+	sysfs_write("gt_max_freq_mhz", max);
+}
+
+static void set_min_freq(void)
+{
+	int min = sysfs_read("gt_RPn_freq_mhz");
+	igt_require(min > 0);
+	igt_debug("Setting min/max to %dMHz\n", min);
+	igt_require(sysfs_write("gt_min_freq_mhz", min) == 0 &&
+		    sysfs_write("gt_max_freq_mhz", min) == 0);
+}
+
+static void set_max_freq(void)
+{
+	int max = sysfs_read("gt_RP0_freq_mhz");
+	igt_require(max > 0);
+	igt_debug("Setting min/max to %dMHz\n", max);
+	igt_require(sysfs_write("gt_max_freq_mhz", max) == 0 &&
+		    sysfs_write("gt_min_freq_mhz", max) == 0);
+}
+
+
+int main(int argc, char **argv)
+{
+	const struct {
+		const char *suffix;
+		void (*func)(void);
+	} rps[] = {
+		{ "", set_auto_freq },
+		{ "-min", set_min_freq },
+		{ "-max", set_max_freq },
+		{ NULL, NULL },
+	}, *r;
+	int min = -1, max = -1;
+	int i;
+
+	igt_subtest_init(argc, argv);
+
+	igt_skip_on_simulation();
+
+	if (argc > 1) {
+		for (i = 1; i < argc; i++) {
+			int object_size = atoi(argv[i]);
+			if (object_size)
+				run((object_size + 3) & -4, false);
+		}
+		_exit(0); /* blergh */
+	}
+
+	igt_fixture {
+		min = sysfs_read("gt_min_freq_mhz");
+		max = sysfs_read("gt_max_freq_mhz");
+	}
+
+	for (r = rps; r->suffix; r++) {
+		igt_fixture r->func();
+
+		igt_subtest_f("cold%s", r->suffix)
+			run(OBJECT_SIZE, false);
+
+		igt_subtest_f("normal%s", r->suffix)
+			run(OBJECT_SIZE, false);
+
+		igt_subtest_f("dumb-buf%s", r->suffix)
+			run(OBJECT_SIZE, true);
+	}
+
+	igt_fixture {
+		if (min > 0)
+			sysfs_write("gt_min_freq_mhz", min);
+		if (max > 0)
+			sysfs_write("gt_max_freq_mhz", max);
+	}
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_exec_capture.c b/tests/i915/gem_exec_capture.c
new file mode 100644
index 00000000..3e4a4377
--- /dev/null
+++ b/tests/i915/gem_exec_capture.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_sysfs.h"
+
+#define LOCAL_OBJECT_CAPTURE (1 << 7)
+#define LOCAL_PARAM_HAS_EXEC_CAPTURE 45
+
+IGT_TEST_DESCRIPTION("Check that we capture the user specified objects on a hang");
+
+static void check_error_state(int dir, struct drm_i915_gem_exec_object2 *obj)
+{
+	char *error, *str;
+	bool found = false;
+
+	error = igt_sysfs_get(dir, "error");
+	igt_sysfs_set(dir, "error", "Begone!");
+
+	igt_assert(error);
+	igt_debug("%s\n", error);
+
+	/* render ring --- user = 0x00000000 ffffd000 */
+	for (str = error; (str = strstr(str, "--- user = ")); str++) {
+		uint64_t addr;
+		uint32_t hi, lo;
+
+		igt_assert(sscanf(str, "--- user = 0x%x %x", &hi, &lo) == 2);
+		addr = hi;
+		addr <<= 32;
+		addr |= lo;
+		igt_assert_eq_u64(addr, obj->offset);
+		found = true;
+	}
+
+	igt_assert(found);
+}
+
+static void __capture(int fd, int dir, unsigned ring, uint32_t target)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[4];
+#define SCRATCH 0
+#define CAPTURE 1
+#define NOCAPTURE 2
+#define BATCH 3
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t *batch, *seqno;
+	int i;
+
+	memset(obj, 0, sizeof(obj));
+	obj[SCRATCH].handle = gem_create(fd, 4096);
+	obj[CAPTURE].handle = target;
+	obj[CAPTURE].flags = LOCAL_OBJECT_CAPTURE;
+	obj[NOCAPTURE].handle = gem_create(fd, 4096);
+
+	obj[BATCH].handle = gem_create(fd, 4096);
+	obj[BATCH].relocs_ptr = (uintptr_t)reloc;
+	obj[BATCH].relocation_count = ARRAY_SIZE(reloc);
+
+	memset(reloc, 0, sizeof(reloc));
+	reloc[0].target_handle = obj[BATCH].handle; /* recurse */
+	reloc[0].presumed_offset = 0;
+	reloc[0].offset = 5*sizeof(uint32_t);
+	reloc[0].delta = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
+	reloc[0].write_domain = 0;
+
+	reloc[1].target_handle = obj[SCRATCH].handle; /* breadcrumb */
+	reloc[1].presumed_offset = 0;
+	reloc[1].offset = sizeof(uint32_t);
+	reloc[1].delta = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	seqno = gem_mmap__wc(fd, obj[SCRATCH].handle, 0, 4096, PROT_READ);
+	gem_set_domain(fd, obj[SCRATCH].handle,
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	batch = gem_mmap__cpu(fd, obj[BATCH].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, obj[BATCH].handle,
+			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = 0;
+		reloc[1].offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = 0;
+	}
+	batch[++i] = 0xc0ffee;
+	if (gen < 4)
+		batch[++i] = MI_NOOP;
+
+	batch[++i] = MI_BATCH_BUFFER_START; /* not crashed? try again! */
+	if (gen >= 8) {
+		batch[i] |= 1 << 8 | 1;
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 6) {
+		batch[i] |= 1 << 8;
+		batch[++i] = 0;
+	} else {
+		batch[i] |= 2 << 6;
+		batch[++i] = 0;
+		if (gen < 4) {
+			batch[i] |= 1;
+			reloc[0].delta = 1;
+		}
+	}
+	munmap(batch, 4096);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = (uintptr_t)obj;
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	execbuf.flags = ring;
+	if (gen > 3 && gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	igt_assert(!READ_ONCE(*seqno));
+	gem_execbuf(fd, &execbuf);
+
+	/* Wait for the request to start */
+	while (READ_ONCE(*seqno) != 0xc0ffee)
+		igt_assert(gem_bo_busy(fd, obj[SCRATCH].handle));
+	munmap(seqno, 4096);
+
+	/* Check that only the buffer we marked is reported in the error */
+	igt_force_gpu_reset(fd);
+	check_error_state(dir, &obj[CAPTURE]);
+
+	gem_sync(fd, obj[BATCH].handle);
+
+	gem_close(fd, obj[BATCH].handle);
+	gem_close(fd, obj[NOCAPTURE].handle);
+	gem_close(fd, obj[SCRATCH].handle);
+}
+
+static void capture(int fd, int dir, unsigned ring)
+{
+	uint32_t handle;
+
+	handle = gem_create(fd, 4096);
+	__capture(fd, dir, ring, handle);
+	gem_close(fd, handle);
+}
+
+static void userptr(int fd, int dir)
+{
+	uint32_t handle;
+	void *ptr;
+
+	igt_assert(posix_memalign(&ptr, 4096, 4096) == 0);
+	igt_require(__gem_userptr(fd, ptr, 4096, 0, 0, &handle) == 0);
+
+	__capture(fd, dir, 0, handle);
+
+	gem_close(fd, handle);
+	free(ptr);
+}
+
+static bool has_capture(int fd)
+{
+	drm_i915_getparam_t gp;
+	int async = -1;
+
+	gp.param = LOCAL_PARAM_HAS_EXEC_CAPTURE;
+	gp.value = &async;
+	drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+
+	return async > 0;
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	igt_hang_t hang;
+	int fd = -1;
+	int dir = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		int gen;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+
+		gen = intel_gen(intel_get_drm_devid(fd));
+		if (gen > 3 && gen < 6) /* ctg and ilk need secure batches */
+			igt_device_set_master(fd);
+
+		igt_require_gem(fd);
+		gem_require_mmap_wc(fd);
+		igt_require(has_capture(fd));
+		igt_allow_hang(fd, 0, HANG_ALLOW_CAPTURE);
+
+		dir = igt_sysfs_open(fd, NULL);
+		igt_require(igt_sysfs_set(dir, "error", "Begone!"));
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		/* default exec-id is purely symbolic */
+		if (e->exec_id == 0)
+			continue;
+
+		igt_subtest_f("capture-%s", e->name) {
+			igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+			igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
+			capture(fd, dir, e->exec_id | e->flags);
+		}
+	}
+
+	/* And check we can read from different types of objects */
+
+	igt_subtest_f("userptr") {
+		igt_require(gem_can_store_dword(fd, 0));
+		userptr(fd, dir);
+	}
+
+	igt_fixture {
+		close(dir);
+		igt_disallow_hang(fd, hang);
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_create.c b/tests/i915/gem_exec_create.c
new file mode 100644
index 00000000..54a2429e
--- /dev/null
+++ b/tests/i915/gem_exec_create.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <time.h>
+#include "drm.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+	return ((end->tv_sec - start->tv_sec) +
+		(end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+#define LEAK 0x1
+
+static void all(int fd, unsigned flags, int timeout, int ncpus)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	unsigned engines[16];
+	unsigned nengine;
+	unsigned engine;
+
+	nengine = 0;
+	for_each_physical_engine(fd, engine)
+		engines[nengine++] = engine;
+	igt_require(nengine);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle =  gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags = 0;
+		gem_execbuf(fd, &execbuf);
+	}
+	gem_sync(fd, obj.handle);
+	gem_close(fd, obj.handle);
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, ncpus) {
+		struct timespec start, now;
+		unsigned long count;
+		double time;
+
+		count = 0;
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		do {
+			for (int loop = 0; loop < 1024; loop++) {
+				for (int n = 0; n < nengine; n++) {
+					obj.handle =  gem_create(fd, 4096);
+					gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+					execbuf.flags &= ~ENGINE_FLAGS;
+					execbuf.flags |= engines[n];
+					gem_execbuf(fd, &execbuf);
+					if (flags & LEAK)
+						gem_madvise(fd, obj.handle, I915_MADV_DONTNEED);
+					else
+						gem_close(fd, obj.handle);
+				}
+			}
+			count += nengine * 1024;
+			clock_gettime(CLOCK_MONOTONIC, &now);
+		} while (elapsed(&start, &now) < timeout); /* Hang detection ~120s */
+		obj.handle =  gem_create(fd, 4096);
+		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+		for (int n = 0; n < nengine; n++) {
+			execbuf.flags &= ~ENGINE_FLAGS;
+			execbuf.flags |= engines[n];
+			gem_execbuf(fd, &execbuf);
+		}
+		gem_sync(fd, obj.handle);
+		gem_close(fd, obj.handle);
+		clock_gettime(CLOCK_MONOTONIC, &now);
+
+		time = elapsed(&start, &now) / count;
+		igt_info("[%d] All (%d engines): %'lu cycles, average %.3fus per cycle\n",
+			 child, nengine, count, 1e6*time);
+	}
+	igt_waitchildren();
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+igt_main
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	int device = -1;
+
+	igt_fixture {
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+
+		igt_fork_hang_detector(device);
+	}
+
+	igt_subtest("basic")
+		all(device, 0, 5, 1);
+	igt_subtest("forked")
+		all(device, 0, 150, ncpus);
+
+	igt_subtest("madvise")
+		all(device, LEAK, 20, 1);
+
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(device);
+	}
+}
diff --git a/tests/i915/gem_exec_faulting_reloc.c b/tests/i915/gem_exec_faulting_reloc.c
new file mode 100644
index 00000000..6b05e43f
--- /dev/null
+++ b/tests/i915/gem_exec_faulting_reloc.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+/* Testcase: Submit patches with relocations in memory that will fault
+ *
+ * To be really evil, use a gtt mmap for them.
+ */
+
+IGT_TEST_DESCRIPTION("Submit patches with relocations in memory that will"
+		     " fault.");
+
+#define OBJECT_SIZE 16384
+
+#define COPY_BLT_CMD_NOLEN	(2<<29|0x53<<22)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+#define BLT_SRC_TILED		(1<<15)
+#define BLT_DST_TILED		(1<<11)
+
+uint32_t devid;
+
+static int gem_linear_blt(uint32_t *batch,
+			  uint32_t src,
+			  uint32_t dst,
+			  uint32_t length,
+			  struct drm_i915_gem_relocation_entry *reloc)
+{
+	uint32_t *b = batch;
+	int height = length / (16 * 1024);
+
+	igt_assert_lte(height, 1 << 16);
+
+	if (height) {
+		int i = 0;
+		b[i++] = COPY_BLT_CMD_NOLEN | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+		if (intel_gen(devid) >= 8)
+			b[i-1] |= 8;
+		else
+			b[i-1] |= 6;
+		b[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
+		b[i++] = 0;
+		b[i++] = height << 16 | (4*1024);
+		b[i++] = 0;
+		reloc->offset = (b-batch+4) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = dst;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		if (intel_gen(devid) >= 8)
+			b[i++] = 0; /* FIXME: use real high dword */
+
+		b[i++] = 0;
+		b[i++] = 16*1024;
+		b[i++] = 0;
+		reloc->offset = (b-batch+7) * sizeof(uint32_t);
+		if (intel_gen(devid) >= 8) {
+			reloc->offset += sizeof(uint32_t);
+			b[i++] = 0; /* FIXME: use real high dword */
+		}
+		reloc->delta = 0;
+		reloc->target_handle = src;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = 0;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		if (intel_gen(devid) >= 8)
+			b += 10;
+		else
+			b += 8;
+		length -= height * 16*1024;
+	}
+	
+	if (length) {
+		int i = 0;
+		b[i++] = COPY_BLT_CMD_NOLEN | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+		if (intel_gen(devid) >= 8)
+			b[i-1] |= 8;
+		else
+			b[i-1] |= 6;
+		b[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
+		b[i++] = height << 16;
+		b[i++] = (1+height) << 16 | (length / 4);
+		b[i++] = 0;
+		reloc->offset = (b-batch+4) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = dst;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc->presumed_offset = 0;
+		reloc++;
+		if (intel_gen(devid) >= 8)
+			b[i++] = 0; /* FIXME: use real high dword */
+
+		b[i++] = height << 16;
+		b[i++] = 16*1024;
+		b[i++] = 0;
+		reloc->offset = (b-batch+7) * sizeof(uint32_t);
+		if (intel_gen(devid) >= 8) {
+			reloc->offset += sizeof(uint32_t);
+			b[i++] = 0; /* FIXME: use real high dword */
+		}
+		reloc->delta = 0;
+		reloc->target_handle = src;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = 0;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		if (intel_gen(devid) >= 8)
+			b += 10;
+		else
+			b += 8;
+	}
+
+	b[0] = MI_BATCH_BUFFER_END;
+	b[1] = 0;
+
+	return (b+2 - batch) * sizeof(uint32_t);
+}
+
+static void run(int object_size)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[3];
+	struct drm_i915_gem_relocation_entry reloc[4];
+	uint32_t buf[40];
+	uint32_t handle, handle_relocs, src, dst;
+	void *gtt_relocs;
+	int fd, len;
+	int ring;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+	devid = intel_get_drm_devid(fd);
+	handle = gem_create(fd, 4096);
+	src = gem_create(fd, object_size);
+	dst = gem_create(fd, object_size);
+
+	len = gem_linear_blt(buf, src, dst, object_size, reloc);
+	gem_write(fd, handle, 0, buf, len);
+
+	exec[0].handle = src;
+	exec[0].relocation_count = 0;
+	exec[0].relocs_ptr = 0;
+	exec[0].alignment = 0;
+	exec[0].offset = 0;
+	exec[0].flags = 0;
+	exec[0].rsvd1 = 0;
+	exec[0].rsvd2 = 0;
+
+	exec[1].handle = dst;
+	exec[1].relocation_count = 0;
+	exec[1].relocs_ptr = 0;
+	exec[1].alignment = 0;
+	exec[1].offset = 0;
+	exec[1].flags = 0;
+	exec[1].rsvd1 = 0;
+	exec[1].rsvd2 = 0;
+
+	handle_relocs = gem_create(fd, 4096);
+	gem_write(fd, handle_relocs, 0, reloc, sizeof(reloc));
+	gtt_relocs = gem_mmap__gtt(fd, handle_relocs, 4096,
+				   PROT_READ | PROT_WRITE);
+
+	exec[2].handle = handle;
+	if (intel_gen(devid) >= 8)
+		exec[2].relocation_count = len > 56 ? 4 : 2;
+	else
+		exec[2].relocation_count = len > 40 ? 4 : 2;
+	/* A newly mmap gtt bo will fault on first access. */
+	exec[2].relocs_ptr = to_user_pointer(gtt_relocs);
+	exec[2].alignment = 0;
+	exec[2].offset = 0;
+	exec[2].flags = 0;
+	exec[2].rsvd1 = 0;
+	exec[2].rsvd2 = 0;
+
+	ring = 0;
+	if (HAS_BLT_RING(devid))
+		ring = I915_EXEC_BLT;
+
+	execbuf.buffers_ptr = to_user_pointer(exec);
+	execbuf.buffer_count = 3;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = len;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, handle);
+
+	gem_close(fd, handle);
+
+	close(fd);
+}
+
+igt_main
+{
+	igt_subtest("normal")
+		run(OBJECT_SIZE);
+	igt_subtest("no-prefault") {
+		igt_disable_prefault();
+		run(OBJECT_SIZE);
+		igt_enable_prefault();
+	}
+}
diff --git a/tests/i915/gem_exec_fence.c b/tests/i915/gem_exec_fence.c
new file mode 100644
index 00000000..ba46595d
--- /dev/null
+++ b/tests/i915/gem_exec_fence.c
@@ -0,0 +1,1731 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+#include "igt_vgem.h"
+#include "sw_sync.h"
+#include "i915/gem_ring.h"
+
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/signal.h>
+
+IGT_TEST_DESCRIPTION("Check that execbuf waits for explicit fences");
+
+#define LOCAL_EXEC_FENCE_IN (1 << 16)
+#define LOCAL_EXEC_FENCE_OUT (1 << 17)
+#define LOCAL_EXEC_FENCE_SUBMIT (1 << 20)
+
+#define LOCAL_EXEC_FENCE_ARRAY (1 << 19)
+struct local_gem_exec_fence {
+	uint32_t handle;
+	uint32_t flags;
+#define LOCAL_EXEC_FENCE_WAIT (1 << 0)
+#define LOCAL_EXEC_FENCE_SIGNAL (1 << 1)
+};
+
+#ifndef SYNC_IOC_MERGE
+struct sync_merge_data {
+	char    name[32];
+	int32_t fd2;
+	int32_t fence;
+	uint32_t        flags;
+	uint32_t        pad;
+};
+#define SYNC_IOC_MAGIC '>'
+#define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
+#endif
+
+static void store(int fd, unsigned ring, int fence, uint32_t target, unsigned offset_value)
+{
+	const int SCRATCH = 0;
+	const int BATCH = 1;
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[16];
+	int i;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring | LOCAL_EXEC_FENCE_IN;
+	execbuf.rsvd2 = fence;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	memset(obj, 0, sizeof(obj));
+	obj[SCRATCH].handle = target;
+
+	obj[BATCH].handle = gem_create(fd, 4096);
+	obj[BATCH].relocs_ptr = to_user_pointer(&reloc);
+	obj[BATCH].relocation_count = 1;
+	memset(&reloc, 0, sizeof(reloc));
+
+	i = 0;
+	reloc.target_handle = obj[SCRATCH].handle;
+	reloc.presumed_offset = -1;
+	reloc.offset = sizeof(uint32_t) * (i + 1);
+	reloc.delta = sizeof(uint32_t) * offset_value;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = reloc.delta;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = reloc.delta;
+		reloc.offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = reloc.delta;
+	}
+	batch[++i] = offset_value;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(fd, obj[BATCH].handle, 0, batch, sizeof(batch));
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, obj[BATCH].handle);
+}
+
+static bool fence_busy(int fence)
+{
+	return poll(&(struct pollfd){fence, POLLIN}, 1, 0) == 0;
+}
+
+#define HANG 0x1
+#define NONBLOCK 0x2
+#define WAIT 0x4
+
+static void test_fence_busy(int fd, unsigned ring, unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct timespec tv;
+	uint32_t *batch;
+	int fence, i, timeout;
+
+	gem_quiescent_gpu(fd);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = ring | LOCAL_EXEC_FENCE_OUT;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+
+	obj.relocs_ptr = to_user_pointer(&reloc);
+	obj.relocation_count = 1;
+	memset(&reloc, 0, sizeof(reloc));
+
+	batch = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, obj.handle,
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	reloc.target_handle = obj.handle; /* recurse */
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = 0;
+	reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
+	reloc.write_domain = 0;
+
+	i = 0;
+	batch[i] = MI_BATCH_BUFFER_START;
+	if (gen >= 8) {
+		batch[i] |= 1 << 8 | 1;
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 6) {
+		batch[i] |= 1 << 8;
+		batch[++i] = 0;
+	} else {
+		batch[i] |= 2 << 6;
+		batch[++i] = 0;
+		if (gen < 4) {
+			batch[i] |= 1;
+			reloc.delta = 1;
+		}
+	}
+	i++;
+
+	execbuf.rsvd2 = -1;
+	gem_execbuf_wr(fd, &execbuf);
+	fence = execbuf.rsvd2 >> 32;
+	igt_assert(fence != -1);
+
+	igt_assert(gem_bo_busy(fd, obj.handle));
+	igt_assert(fence_busy(fence));
+
+	timeout = 120;
+	if ((flags & HANG) == 0) {
+		*batch = MI_BATCH_BUFFER_END;
+		__sync_synchronize();
+		timeout = 1;
+	}
+	munmap(batch, 4096);
+
+	if (flags & WAIT) {
+		struct pollfd pfd = { .fd = fence, .events = POLLIN };
+		igt_assert(poll(&pfd, 1, timeout*1000) == 1);
+	} else {
+		memset(&tv, 0, sizeof(tv));
+		while (fence_busy(fence))
+			igt_assert(igt_seconds_elapsed(&tv) < timeout);
+	}
+
+	igt_assert(!gem_bo_busy(fd, obj.handle));
+	igt_assert_eq(sync_fence_status(fence),
+		      flags & HANG ? -EIO : SYNC_FENCE_OK);
+
+	close(fence);
+	gem_close(fd, obj.handle);
+
+	gem_quiescent_gpu(fd);
+}
+
+static void test_fence_busy_all(int fd, unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct timespec tv;
+	uint32_t *batch;
+	unsigned int engine;
+	int all, i, timeout;
+
+	gem_quiescent_gpu(fd);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+
+	obj.relocs_ptr = to_user_pointer(&reloc);
+	obj.relocation_count = 1;
+	memset(&reloc, 0, sizeof(reloc));
+
+	batch = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, obj.handle,
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	reloc.target_handle = obj.handle; /* recurse */
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = 0;
+	reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
+	reloc.write_domain = 0;
+
+	i = 0;
+	batch[i] = MI_BATCH_BUFFER_START;
+	if (gen >= 8) {
+		batch[i] |= 1 << 8 | 1;
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 6) {
+		batch[i] |= 1 << 8;
+		batch[++i] = 0;
+	} else {
+		batch[i] |= 2 << 6;
+		batch[++i] = 0;
+		if (gen < 4) {
+			batch[i] |= 1;
+			reloc.delta = 1;
+		}
+	}
+	i++;
+
+	all = -1;
+	for_each_engine(fd, engine) {
+		int fence, new;
+
+		execbuf.flags = engine | LOCAL_EXEC_FENCE_OUT;
+		execbuf.rsvd2 = -1;
+		gem_execbuf_wr(fd, &execbuf);
+		fence = execbuf.rsvd2 >> 32;
+		igt_assert(fence != -1);
+
+		if (all < 0) {
+			all = fence;
+			break;
+		}
+
+		new = sync_fence_merge(all, fence);
+		igt_assert_lte(0, new);
+		close(all);
+		close(fence);
+
+		all = new;
+	}
+
+	igt_assert(gem_bo_busy(fd, obj.handle));
+	igt_assert(fence_busy(all));
+
+	timeout = 120;
+	if ((flags & HANG) == 0) {
+		*batch = MI_BATCH_BUFFER_END;
+		__sync_synchronize();
+		timeout = 1;
+	}
+	munmap(batch, 4096);
+
+	if (flags & WAIT) {
+		struct pollfd pfd = { .fd = all, .events = POLLIN };
+		igt_assert(poll(&pfd, 1, timeout*1000) == 1);
+	} else {
+		memset(&tv, 0, sizeof(tv));
+		while (fence_busy(all))
+			igt_assert(igt_seconds_elapsed(&tv) < timeout);
+	}
+
+	igt_assert(!gem_bo_busy(fd, obj.handle));
+	igt_assert_eq(sync_fence_status(all),
+		      flags & HANG ? -EIO : SYNC_FENCE_OK);
+
+	close(all);
+	gem_close(fd, obj.handle);
+
+	gem_quiescent_gpu(fd);
+}
+
+static void test_fence_await(int fd, unsigned ring, unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t scratch = gem_create(fd, 4096);
+	uint32_t *batch, *out;
+	unsigned engine;
+	int fence, i;
+
+	igt_require(gem_can_store_dword(fd, 0));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = ring | LOCAL_EXEC_FENCE_OUT;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+
+	obj.relocs_ptr = to_user_pointer(&reloc);
+	obj.relocation_count = 1;
+	memset(&reloc, 0, sizeof(reloc));
+
+	out = gem_mmap__wc(fd, scratch, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, obj.handle,
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	batch = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, obj.handle,
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	reloc.target_handle = obj.handle; /* recurse */
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = 0;
+	reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
+	reloc.write_domain = 0;
+
+	i = 0;
+	batch[i] = MI_BATCH_BUFFER_START;
+	if (gen >= 8) {
+		batch[i] |= 1 << 8 | 1;
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 6) {
+		batch[i] |= 1 << 8;
+		batch[++i] = 0;
+	} else {
+		batch[i] |= 2 << 6;
+		batch[++i] = 0;
+		if (gen < 4) {
+			batch[i] |= 1;
+			reloc.delta = 1;
+		}
+	}
+	i++;
+
+	execbuf.rsvd2 = -1;
+	gem_execbuf_wr(fd, &execbuf);
+	gem_close(fd, obj.handle);
+	fence = execbuf.rsvd2 >> 32;
+	igt_assert(fence != -1);
+
+	i = 0;
+	for_each_physical_engine(fd, engine) {
+		if (!gem_can_store_dword(fd, engine))
+			continue;
+
+		if (flags & NONBLOCK) {
+			store(fd, engine, fence, scratch, i);
+		} else {
+			igt_fork(child, 1)
+				store(fd, engine, fence, scratch, i);
+		}
+
+		i++;
+	}
+	close(fence);
+
+	sleep(1);
+
+	/* Check for invalidly completing the task early */
+	for (int n = 0; n < i; n++)
+		igt_assert_eq_u32(out[n], 0);
+
+	if ((flags & HANG) == 0) {
+		*batch = MI_BATCH_BUFFER_END;
+		__sync_synchronize();
+	}
+	munmap(batch, 4096);
+
+	igt_waitchildren();
+
+	gem_set_domain(fd, scratch, I915_GEM_DOMAIN_GTT, 0);
+	while (i--)
+		igt_assert_eq_u32(out[i], i);
+	munmap(out, 4096);
+	gem_close(fd, scratch);
+}
+
+static void resubmit(int fd, uint32_t handle, unsigned int ring, int count)
+{
+	struct drm_i915_gem_exec_object2 obj = { .handle = handle };
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+	};
+	while (count--)
+		gem_execbuf(fd, &execbuf);
+}
+
+static void alarm_handler(int sig)
+{
+}
+
+static int __execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+	int err;
+
+	err = 0;
+	if (ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, execbuf))
+		err = -errno;
+
+	errno = 0;
+	return err;
+}
+
+static void test_parallel(int fd, unsigned int master)
+{
+	const int SCRATCH = 0;
+	const int BATCH = 1;
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t scratch = gem_create(fd, 4096);
+	uint32_t *out = gem_mmap__wc(fd, scratch, 0, 4096, PROT_READ);
+	uint32_t handle[16];
+	uint32_t batch[16];
+	igt_spin_t *spin;
+	unsigned engine;
+	IGT_CORK_HANDLE(c);
+	uint32_t plug;
+	int i, x = 0;
+
+	plug = igt_cork_plug(&c, fd);
+
+	/* Fill the queue with many requests so that the next one has to
+	 * wait before it can be executed by the hardware.
+	 */
+	spin = igt_spin_batch_new(fd, .engine = master, .dependency = plug);
+	resubmit(fd, spin->handle, master, 16);
+
+	/* Now queue the master request and its secondaries */
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = master | LOCAL_EXEC_FENCE_OUT;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	memset(obj, 0, sizeof(obj));
+	obj[SCRATCH].handle = scratch;
+
+	obj[BATCH].handle = gem_create(fd, 4096);
+	handle[x] = obj[BATCH].handle;
+	obj[BATCH].relocs_ptr = to_user_pointer(&reloc);
+	obj[BATCH].relocation_count = 2;
+	memset(reloc, 0, sizeof(reloc));
+
+	i = 0;
+
+	reloc[0].target_handle = obj[SCRATCH].handle;
+	reloc[0].presumed_offset = -1;
+	reloc[0].offset = sizeof(uint32_t) * (i + 1);
+	reloc[0].delta = sizeof(uint32_t) * x++;
+	reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc[0].write_domain = 0; /* lies */
+
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = reloc[0].presumed_offset + reloc[0].delta;
+		batch[++i] = (reloc[0].presumed_offset + reloc[0].delta) >> 32;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = reloc[0].presumed_offset + reloc[0].delta;
+		reloc[0].offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = reloc[0].presumed_offset + reloc[0].delta;
+	}
+	batch[++i] = ~0u ^ x;
+
+	reloc[1].target_handle = obj[BATCH].handle; /* recurse */
+	reloc[1].presumed_offset = 0;
+	reloc[1].offset = sizeof(uint32_t) * (i + 2);
+	reloc[1].delta = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_COMMAND;
+	reloc[1].write_domain = 0;
+
+	batch[++i] = MI_BATCH_BUFFER_START;
+	if (gen >= 8) {
+		batch[i] |= 1 << 8 | 1;
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 6) {
+		batch[i] |= 1 << 8;
+		batch[++i] = 0;
+	} else {
+		batch[i] |= 2 << 6;
+		batch[++i] = 0;
+		if (gen < 4) {
+			batch[i] |= 1;
+			reloc[1].delta = 1;
+		}
+	}
+	batch[++i] = MI_BATCH_BUFFER_END;
+	igt_assert(i < sizeof(batch)/sizeof(batch[0]));
+	gem_write(fd, obj[BATCH].handle, 0, batch, sizeof(batch));
+	gem_execbuf_wr(fd, &execbuf);
+
+	igt_assert(execbuf.rsvd2);
+	execbuf.rsvd2 >>= 32; /* out fence -> in fence */
+	obj[BATCH].relocation_count = 1;
+
+	/* Queue all secondaries */
+	for_each_physical_engine(fd, engine) {
+		if (engine == master)
+			continue;
+
+		execbuf.flags = engine | LOCAL_EXEC_FENCE_SUBMIT;
+		if (gen < 6)
+			execbuf.flags |= I915_EXEC_SECURE;
+
+		obj[BATCH].handle = gem_create(fd, 4096);
+		handle[x] = obj[BATCH].handle;
+
+		i = 0;
+		reloc[0].delta = sizeof(uint32_t) * x++;
+		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			batch[++i] = reloc[0].presumed_offset + reloc[0].delta;
+			batch[++i] = (reloc[0].presumed_offset + reloc[0].delta) >> 32;
+		} else if (gen >= 4) {
+			batch[++i] = 0;
+			batch[++i] = reloc[0].presumed_offset + reloc[0].delta;
+		} else {
+			batch[i]--;
+			batch[++i] = reloc[0].presumed_offset + reloc[0].delta;
+		}
+		batch[++i] = ~0u ^ x;
+		batch[++i] = MI_BATCH_BUFFER_END;
+		gem_write(fd, obj[BATCH].handle, 0, batch, sizeof(batch));
+		gem_execbuf(fd, &execbuf);
+	}
+	igt_assert(gem_bo_busy(fd, spin->handle));
+	close(execbuf.rsvd2);
+
+	/* No secondary should be executed since master is stalled. If there
+	 * was no dependency chain at all, the secondaries would start
+	 * immediately.
+	 */
+	for (i = 0; i < x; i++) {
+		igt_assert_eq_u32(out[i], 0);
+		igt_assert(gem_bo_busy(fd, handle[i]));
+	}
+
+	/* Unblock the master */
+	igt_cork_unplug(&c);
+	gem_close(fd, plug);
+	igt_spin_batch_end(spin);
+
+	/* Wait for all secondaries to complete. If we used a regular fence
+	 * then the secondaries would not start until the master was complete.
+	 * In this case that can only happen with a GPU reset, and so we run
+	 * under the hang detector and double check that the master is still
+	 * running afterwards.
+	 */
+	for (i = 1; i < x; i++) {
+		while (gem_bo_busy(fd, handle[i]))
+			sleep(0);
+
+		igt_assert_f(out[i], "Missing output from engine %d\n", i);
+		gem_close(fd, handle[i]);
+	}
+	munmap(out, 4096);
+	gem_close(fd, obj[SCRATCH].handle);
+
+	/* Master should still be spinning, but all output should be written */
+	igt_assert(gem_bo_busy(fd, handle[0]));
+	out = gem_mmap__wc(fd, handle[0], 0, 4096, PROT_WRITE);
+	out[0] = MI_BATCH_BUFFER_END;
+	munmap(out, 4096);
+	gem_close(fd, handle[0]);
+}
+
+static uint32_t batch_create(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static inline uint32_t lower_32_bits(uint64_t x)
+{
+	return x & 0xffffffff;
+}
+
+static inline uint32_t upper_32_bits(uint64_t x)
+{
+	return x >> 32;
+}
+
+static void test_keep_in_fence(int fd, unsigned int engine, unsigned int flags)
+{
+	struct sigaction sa = { .sa_handler = alarm_handler };
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(fd),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = engine | LOCAL_EXEC_FENCE_OUT,
+	};
+	unsigned long count, last;
+	struct itimerval itv;
+	igt_spin_t *spin;
+	int fence;
+
+	spin = igt_spin_batch_new(fd, .engine = engine);
+
+	gem_execbuf_wr(fd, &execbuf);
+	fence = upper_32_bits(execbuf.rsvd2);
+
+	sigaction(SIGALRM, &sa, NULL);
+	itv.it_interval.tv_sec = 0;
+	itv.it_interval.tv_usec = 1000;
+	itv.it_value.tv_sec = 0;
+	itv.it_value.tv_usec = 10000;
+	setitimer(ITIMER_REAL, &itv, NULL);
+
+	execbuf.flags |= LOCAL_EXEC_FENCE_IN;
+	execbuf.rsvd2 = fence;
+
+	last = -1;
+	count = 0;
+	do {
+		int err = __execbuf(fd, &execbuf);
+
+		igt_assert_eq(lower_32_bits(execbuf.rsvd2), fence);
+
+		if (err == 0) {
+			close(fence);
+
+			fence = upper_32_bits(execbuf.rsvd2);
+			execbuf.rsvd2 = fence;
+
+			count++;
+			continue;
+		}
+
+		igt_assert_eq(err, -EINTR);
+		igt_assert_eq(upper_32_bits(execbuf.rsvd2), 0);
+
+		if (last == count)
+			break;
+
+		last = count;
+	} while (1);
+
+	memset(&itv, 0, sizeof(itv));
+	setitimer(ITIMER_REAL, &itv, NULL);
+
+	gem_close(fd, obj.handle);
+	close(fence);
+
+	igt_spin_batch_free(fd, spin);
+	gem_quiescent_gpu(fd);
+}
+
+#define EXPIRED 0x10000
+static void test_long_history(int fd, long ring_size, unsigned flags)
+{
+	const uint32_t sz = 1 << 20;
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned int engines[16], engine;
+	unsigned int nengine, n, s;
+	unsigned long limit;
+	int all_fences;
+	IGT_CORK_HANDLE(c);
+
+	limit = -1;
+	if (!gem_uses_full_ppgtt(fd))
+		limit = ring_size / 3;
+
+	nengine = 0;
+	for_each_physical_engine(fd, engine)
+		engines[nengine++] = engine;
+	igt_require(nengine);
+
+	gem_quiescent_gpu(fd);
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, sz);
+	gem_write(fd, obj[1].handle, sz - sizeof(bbe), &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_OUT;
+
+	gem_execbuf_wr(fd, &execbuf);
+	all_fences = execbuf.rsvd2 >> 32;
+
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+
+	obj[0].handle = igt_cork_plug(&c, fd);
+
+	igt_until_timeout(5) {
+		execbuf.rsvd1 = gem_context_create(fd);
+
+		for (n = 0; n < nengine; n++) {
+			struct sync_merge_data merge;
+
+			execbuf.flags = engines[n] | LOCAL_EXEC_FENCE_OUT;
+			if (__gem_execbuf_wr(fd, &execbuf))
+				continue;
+
+			memset(&merge, 0, sizeof(merge));
+			merge.fd2 = execbuf.rsvd2 >> 32;
+			strcpy(merge.name, "igt");
+
+			do_ioctl(all_fences, SYNC_IOC_MERGE, &merge);
+
+			close(all_fences);
+			close(merge.fd2);
+
+			all_fences = merge.fence;
+		}
+
+		gem_context_destroy(fd, execbuf.rsvd1);
+		if (!--limit)
+			break;
+	}
+	igt_cork_unplug(&c);
+
+	igt_info("History depth = %d\n", sync_fence_count(all_fences));
+
+	if (flags & EXPIRED)
+		gem_sync(fd, obj[1].handle);
+
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 1;
+	execbuf.rsvd2 = all_fences;
+	execbuf.rsvd1 = 0;
+
+	for (s = 0; s < ring_size; s++) {
+		for (n = 0; n < nengine; n++) {
+			execbuf.flags = engines[n] | LOCAL_EXEC_FENCE_IN;
+			if (__gem_execbuf_wr(fd, &execbuf))
+				continue;
+		}
+	}
+
+	close(all_fences);
+
+	gem_sync(fd, obj[1].handle);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[0].handle);
+}
+
+static void test_fence_flip(int i915)
+{
+	igt_skip_on_f(1, "no fence-in for atomic flips\n");
+}
+
+static bool has_submit_fence(int fd)
+{
+	struct drm_i915_getparam gp;
+	int value = 0;
+
+	memset(&gp, 0, sizeof(gp));
+	gp.param = 0xdeadbeef ^ 51; /* I915_PARAM_HAS_EXEC_SUBMIT_FENCE */
+	gp.value = &value;
+
+	ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp, sizeof(gp));
+	errno = 0;
+
+	return value;
+}
+
+static bool has_syncobj(int fd)
+{
+	struct drm_get_cap cap = { .capability = 0x13 };
+	ioctl(fd, DRM_IOCTL_GET_CAP, &cap);
+	return cap.value;
+}
+
+static bool exec_has_fence_array(int fd)
+{
+	struct drm_i915_getparam gp;
+	int value = 0;
+
+	memset(&gp, 0, sizeof(gp));
+	gp.param = 49; /* I915_PARAM_HAS_EXEC_FENCE_ARRAY */
+	gp.value = &value;
+
+	ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp, sizeof(gp));
+	errno = 0;
+
+	return value;
+}
+
+static void test_invalid_fence_array(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct local_gem_exec_fence fence;
+	void *ptr;
+
+	/* create an otherwise valid execbuf */
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	gem_execbuf(fd, &execbuf);
+
+	execbuf.flags |= LOCAL_EXEC_FENCE_ARRAY;
+	gem_execbuf(fd, &execbuf);
+
+	/* Now add a few invalid fence-array pointers */
+	if (sizeof(execbuf.num_cliprects) == sizeof(size_t)) {
+		execbuf.num_cliprects = -1;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+
+	execbuf.num_cliprects = 1;
+	execbuf.cliprects_ptr = -1;
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+
+	memset(&fence, 0, sizeof(fence));
+	execbuf.cliprects_ptr = to_user_pointer(&fence);
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);
+
+	ptr = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(ptr != MAP_FAILED);
+	execbuf.cliprects_ptr = to_user_pointer(ptr);
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);
+
+	do_or_die(mprotect(ptr, 4096, PROT_READ));
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -ENOENT);
+
+	do_or_die(mprotect(ptr, 4096, PROT_NONE));
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+
+	munmap(ptr, 4096);
+}
+
+static uint32_t __syncobj_create(int fd)
+{
+	struct local_syncobj_create {
+		uint32_t handle, flags;
+	} arg;
+#define LOCAL_IOCTL_SYNCOBJ_CREATE        DRM_IOWR(0xBF, struct local_syncobj_create)
+
+	memset(&arg, 0, sizeof(arg));
+	igt_ioctl(fd, LOCAL_IOCTL_SYNCOBJ_CREATE, &arg);
+
+	return arg.handle;
+}
+
+static uint32_t syncobj_create(int fd)
+{
+	uint32_t ret;
+
+	igt_assert_neq((ret = __syncobj_create(fd)), 0);
+
+	return ret;
+}
+
+static int __syncobj_destroy(int fd, uint32_t handle)
+{
+	struct local_syncobj_destroy {
+		uint32_t handle, flags;
+	} arg;
+#define LOCAL_IOCTL_SYNCOBJ_DESTROY        DRM_IOWR(0xC0, struct local_syncobj_destroy)
+	int err = 0;
+
+	memset(&arg, 0, sizeof(arg));
+	arg.handle = handle;
+	if (igt_ioctl(fd, LOCAL_IOCTL_SYNCOBJ_DESTROY, &arg))
+		err = -errno;
+
+	errno = 0;
+	return err;
+}
+
+static void syncobj_destroy(int fd, uint32_t handle)
+{
+	igt_assert_eq(__syncobj_destroy(fd, handle), 0);
+}
+
+static int __syncobj_to_sync_file(int fd, uint32_t handle)
+{
+	struct local_syncobj_handle {
+		uint32_t handle;
+		uint32_t flags;
+		int32_t fd;
+		uint32_t pad;
+	} arg;
+#define LOCAL_IOCTL_SYNCOBJ_HANDLE_TO_FD  DRM_IOWR(0xC1, struct local_syncobj_handle)
+
+	memset(&arg, 0, sizeof(arg));
+	arg.handle = handle;
+	arg.flags = 1 << 0; /* EXPORT_SYNC_FILE */
+	if (igt_ioctl(fd, LOCAL_IOCTL_SYNCOBJ_HANDLE_TO_FD, &arg))
+		arg.fd = -errno;
+
+	errno = 0;
+	return arg.fd;
+}
+
+static int syncobj_to_sync_file(int fd, uint32_t handle)
+{
+	int ret;
+
+	igt_assert_lte(0, (ret = __syncobj_to_sync_file(fd, handle)));
+
+	return ret;
+}
+
+static int __syncobj_from_sync_file(int fd, uint32_t handle, int sf)
+{
+	struct local_syncobj_handle {
+		uint32_t handle;
+		uint32_t flags;
+		int32_t fd;
+		uint32_t pad;
+	} arg;
+#define LOCAL_IOCTL_SYNCOBJ_FD_TO_HANDLE  DRM_IOWR(0xC2, struct local_syncobj_handle)
+	int err = 0;
+
+	memset(&arg, 0, sizeof(arg));
+	arg.handle = handle;
+	arg.fd = sf;
+	arg.flags = 1 << 0; /* IMPORT_SYNC_FILE */
+	if (igt_ioctl(fd, LOCAL_IOCTL_SYNCOBJ_FD_TO_HANDLE, &arg))
+		err = -errno;
+
+	errno = 0;
+	return err;
+}
+
+static void syncobj_from_sync_file(int fd, uint32_t handle, int sf)
+{
+	igt_assert_eq(__syncobj_from_sync_file(fd, handle, sf), 0);
+}
+
+static int __syncobj_export(int fd, uint32_t handle, int *syncobj)
+{
+	struct local_syncobj_handle {
+		uint32_t handle;
+		uint32_t flags;
+		int32_t fd;
+		uint32_t pad;
+	} arg;
+	int err;
+
+	memset(&arg, 0, sizeof(arg));
+	arg.handle = handle;
+
+	err = 0;
+	if (igt_ioctl(fd, LOCAL_IOCTL_SYNCOBJ_HANDLE_TO_FD, &arg))
+		err = -errno;
+
+	errno = 0;
+	*syncobj = arg.fd;
+	return err;
+}
+
+static int syncobj_export(int fd, uint32_t handle)
+{
+	int syncobj;
+
+	igt_assert_eq(__syncobj_export(fd, handle, &syncobj), 0);
+
+	return syncobj;
+}
+
+static int __syncobj_import(int fd, int syncobj, uint32_t *handle)
+{
+	struct local_syncobj_handle {
+		uint32_t handle;
+		uint32_t flags;
+		int32_t fd;
+		uint32_t pad;
+	} arg;
+#define LOCAL_IOCTL_SYNCOBJ_FD_TO_HANDLE  DRM_IOWR(0xC2, struct local_syncobj_handle)
+	int err;
+
+	memset(&arg, 0, sizeof(arg));
+	arg.fd = syncobj;
+
+	err = 0;
+	if (igt_ioctl(fd, LOCAL_IOCTL_SYNCOBJ_FD_TO_HANDLE, &arg))
+		err = -errno;
+
+	errno = 0;
+	*handle = arg.handle;
+	return err;
+}
+
+static uint32_t syncobj_import(int fd, int syncobj)
+{
+	uint32_t handle;
+
+	igt_assert_eq(__syncobj_import(fd, syncobj, &handle), 0);
+
+
+	return handle;
+}
+
+static bool syncobj_busy(int fd, uint32_t handle)
+{
+	bool result;
+	int sf;
+
+	sf = syncobj_to_sync_file(fd, handle);
+	result = poll(&(struct pollfd){sf, POLLIN}, 1, 0) == 0;
+	close(sf);
+
+	return result;
+}
+
+static void test_syncobj_unused_fence(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct local_gem_exec_fence fence = {
+		.handle = syncobj_create(fd),
+	};
+	igt_spin_t *spin = igt_spin_batch_new(fd);
+
+	/* sanity check our syncobj_to_sync_file interface */
+	igt_assert_eq(__syncobj_to_sync_file(fd, 0), -ENOENT);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+	execbuf.cliprects_ptr = to_user_pointer(&fence);
+	execbuf.num_cliprects = 1;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	gem_execbuf(fd, &execbuf);
+
+	/* no flags, the fence isn't created */
+	igt_assert_eq(__syncobj_to_sync_file(fd, fence.handle), -EINVAL);
+	igt_assert(gem_bo_busy(fd, obj.handle));
+
+	gem_close(fd, obj.handle);
+	syncobj_destroy(fd, fence.handle);
+
+	igt_spin_batch_free(fd, spin);
+}
+
+static void test_syncobj_invalid_wait(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct local_gem_exec_fence fence = {
+		.handle = syncobj_create(fd),
+	};
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+	execbuf.cliprects_ptr = to_user_pointer(&fence);
+	execbuf.num_cliprects = 1;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	/* waiting before the fence is set is invalid */
+	fence.flags = LOCAL_EXEC_FENCE_WAIT;
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+	gem_close(fd, obj.handle);
+	syncobj_destroy(fd, fence.handle);
+}
+
+static void test_syncobj_invalid_flags(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct local_gem_exec_fence fence = {
+		.handle = syncobj_create(fd),
+	};
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+	execbuf.cliprects_ptr = to_user_pointer(&fence);
+	execbuf.num_cliprects = 1;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	/* set all flags to hit an invalid one */
+	fence.flags = ~0;
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+	gem_close(fd, obj.handle);
+	syncobj_destroy(fd, fence.handle);
+}
+
+static void test_syncobj_signal(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct local_gem_exec_fence fence = {
+		.handle = syncobj_create(fd),
+	};
+	igt_spin_t *spin = igt_spin_batch_new(fd);
+
+	/* Check that the syncobj is signaled only when our request/fence is */
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+	execbuf.cliprects_ptr = to_user_pointer(&fence);
+	execbuf.num_cliprects = 1;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	fence.flags = LOCAL_EXEC_FENCE_SIGNAL;
+	gem_execbuf(fd, &execbuf);
+
+	igt_assert(gem_bo_busy(fd, obj.handle));
+	igt_assert(syncobj_busy(fd, fence.handle));
+
+	igt_spin_batch_free(fd, spin);
+
+	gem_sync(fd, obj.handle);
+	igt_assert(!gem_bo_busy(fd, obj.handle));
+	igt_assert(!syncobj_busy(fd, fence.handle));
+
+	gem_close(fd, obj.handle);
+	syncobj_destroy(fd, fence.handle);
+}
+
+static void test_syncobj_wait(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct local_gem_exec_fence fence = {
+		.handle = syncobj_create(fd),
+	};
+	igt_spin_t *spin;
+	unsigned engine;
+	unsigned handle[16];
+	int n;
+
+	/* Check that we can use the syncobj to asynchronous wait prior to
+	 * execution.
+	 */
+
+	gem_quiescent_gpu(fd);
+
+	spin = igt_spin_batch_new(fd);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	/* Queue a signaler from the blocked engine */
+	execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+	execbuf.cliprects_ptr = to_user_pointer(&fence);
+	execbuf.num_cliprects = 1;
+	fence.flags = LOCAL_EXEC_FENCE_SIGNAL;
+	gem_execbuf(fd, &execbuf);
+	igt_assert(gem_bo_busy(fd, spin->handle));
+
+	gem_close(fd, obj.handle);
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	n = 0;
+	for_each_engine(fd, engine) {
+		obj.handle = gem_create(fd, 4096);
+		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+		/* No inter-engine synchronisation, will complete */
+		if (engine == I915_EXEC_BLT) {
+			execbuf.flags = engine;
+			execbuf.cliprects_ptr = 0;
+			execbuf.num_cliprects = 0;
+			gem_execbuf(fd, &execbuf);
+			gem_sync(fd, obj.handle);
+			igt_assert(gem_bo_busy(fd, spin->handle));
+		}
+		igt_assert(gem_bo_busy(fd, spin->handle));
+
+		/* Now wait upon the blocked engine */
+		execbuf.flags = LOCAL_EXEC_FENCE_ARRAY | engine;
+		execbuf.cliprects_ptr = to_user_pointer(&fence);
+		execbuf.num_cliprects = 1;
+		fence.flags = LOCAL_EXEC_FENCE_WAIT;
+		gem_execbuf(fd, &execbuf);
+
+		igt_assert(gem_bo_busy(fd, obj.handle));
+		handle[n++] = obj.handle;
+	}
+	syncobj_destroy(fd, fence.handle);
+
+	for (int i = 0; i < n; i++)
+		igt_assert(gem_bo_busy(fd, handle[i]));
+
+	igt_spin_batch_free(fd, spin);
+
+	for (int i = 0; i < n; i++) {
+		gem_sync(fd, handle[i]);
+		gem_close(fd, handle[i]);
+	}
+}
+
+static void test_syncobj_export(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct local_gem_exec_fence fence = {
+		.handle = syncobj_create(fd),
+	};
+	int export[2];
+	igt_spin_t *spin = igt_spin_batch_new(fd);
+
+	/* Check that if we export the syncobj prior to use it picks up
+	 * the later fence. This allows a syncobj to establish a channel
+	 * between clients that may be updated to a later fence by either
+	 * end.
+	 */
+	for (int n = 0; n < ARRAY_SIZE(export); n++)
+		export[n] = syncobj_export(fd, fence.handle);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+	execbuf.cliprects_ptr = to_user_pointer(&fence);
+	execbuf.num_cliprects = 1;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	fence.flags = LOCAL_EXEC_FENCE_SIGNAL;
+	gem_execbuf(fd, &execbuf);
+
+	igt_assert(syncobj_busy(fd, fence.handle));
+	igt_assert(gem_bo_busy(fd, obj.handle));
+
+	for (int n = 0; n < ARRAY_SIZE(export); n++) {
+		uint32_t import = syncobj_import(fd, export[n]);
+		igt_assert(syncobj_busy(fd, import));
+		syncobj_destroy(fd, import);
+	}
+
+	igt_spin_batch_free(fd, spin);
+
+	gem_sync(fd, obj.handle);
+	igt_assert(!gem_bo_busy(fd, obj.handle));
+	igt_assert(!syncobj_busy(fd, fence.handle));
+
+	gem_close(fd, obj.handle);
+	syncobj_destroy(fd, fence.handle);
+
+	for (int n = 0; n < ARRAY_SIZE(export); n++) {
+		uint32_t import = syncobj_import(fd, export[n]);
+		igt_assert(!syncobj_busy(fd, import));
+		syncobj_destroy(fd, import);
+		close(export[n]);
+	}
+}
+
+static void test_syncobj_repeat(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	const unsigned nfences = 4096;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct local_gem_exec_fence *fence;
+	int export;
+	igt_spin_t *spin = igt_spin_batch_new(fd);
+
+	/* Check that we can wait on the same fence multiple times */
+	fence = calloc(nfences, sizeof(*fence));
+	fence->handle = syncobj_create(fd);
+	export = syncobj_export(fd, fence->handle);
+	for (int i = 1; i < nfences; i++)
+		fence[i].handle = syncobj_import(fd, export);
+	close(export);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+	execbuf.cliprects_ptr = to_user_pointer(fence);
+	execbuf.num_cliprects = nfences;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	for (int i = 0; i < nfences; i++)
+		fence[i].flags = LOCAL_EXEC_FENCE_SIGNAL;
+
+	gem_execbuf(fd, &execbuf);
+
+	for (int i = 0; i < nfences; i++) {
+		igt_assert(syncobj_busy(fd, fence[i].handle));
+		fence[i].flags |= LOCAL_EXEC_FENCE_WAIT;
+	}
+	igt_assert(gem_bo_busy(fd, obj.handle));
+
+	gem_execbuf(fd, &execbuf);
+
+	for (int i = 0; i < nfences; i++)
+		igt_assert(syncobj_busy(fd, fence[i].handle));
+	igt_assert(gem_bo_busy(fd, obj.handle));
+
+	igt_spin_batch_free(fd, spin);
+
+	gem_sync(fd, obj.handle);
+	gem_close(fd, obj.handle);
+
+	for (int i = 0; i < nfences; i++) {
+		igt_assert(!syncobj_busy(fd, fence[i].handle));
+		syncobj_destroy(fd, fence[i].handle);
+	}
+	free(fence);
+}
+
+static void test_syncobj_import(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	igt_spin_t *spin = igt_spin_batch_new(fd);
+	uint32_t sync = syncobj_create(fd);
+	int fence;
+
+	/* Check that we can create a syncobj from an explicit fence (which
+	 * uses sync_file) and that it acts just like a regular fence.
+	 */
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_OUT;
+	execbuf.rsvd2 = -1;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	gem_execbuf_wr(fd, &execbuf);
+
+	fence = execbuf.rsvd2 >> 32;
+	igt_assert(fence_busy(fence));
+	syncobj_from_sync_file(fd, sync, fence);
+	close(fence);
+
+	igt_assert(gem_bo_busy(fd, obj.handle));
+	igt_assert(syncobj_busy(fd, sync));
+
+	igt_spin_batch_free(fd, spin);
+
+	gem_sync(fd, obj.handle);
+	igt_assert(!gem_bo_busy(fd, obj.handle));
+	igt_assert(!syncobj_busy(fd, sync));
+
+	gem_close(fd, obj.handle);
+	syncobj_destroy(fd, sync);
+}
+
+static void test_syncobj_channel(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned int *control;
+	int syncobj[3];
+
+	/* Create a pair of channels (like a pipe) between two clients
+	 * and try to create races on the syncobj.
+	 */
+
+	control = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(control != MAP_FAILED);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_OUT;
+	execbuf.rsvd2 = -1;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	for (int i = 0; i < ARRAY_SIZE(syncobj); i++) {
+		struct local_gem_exec_fence fence;
+
+		execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+		execbuf.cliprects_ptr = to_user_pointer(&fence);
+		execbuf.num_cliprects = 1;
+
+		/* Create a primed fence */
+		fence.handle = syncobj_create(fd);
+		fence.flags = LOCAL_EXEC_FENCE_SIGNAL;
+
+		gem_execbuf(fd, &execbuf);
+
+		syncobj[i] = fence.handle;
+	}
+
+	/* Two processes in ping-pong unison (pipe), one out of sync */
+	igt_fork(child, 1) {
+		struct local_gem_exec_fence fence[3];
+		unsigned long count;
+
+		execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+		execbuf.cliprects_ptr = to_user_pointer(fence);
+		execbuf.num_cliprects = 3;
+
+		fence[0].handle = syncobj[0];
+		fence[0].flags = LOCAL_EXEC_FENCE_SIGNAL;
+
+		fence[1].handle = syncobj[1];
+		fence[1].flags = LOCAL_EXEC_FENCE_WAIT;
+
+		fence[2].handle = syncobj[2];
+		fence[2].flags = LOCAL_EXEC_FENCE_WAIT;
+
+		count = 0;
+		while (!*(volatile unsigned *)control) {
+			gem_execbuf(fd, &execbuf);
+			count++;
+		}
+
+		control[1] = count;
+	}
+	igt_fork(child, 1) {
+		struct local_gem_exec_fence fence[3];
+		unsigned long count;
+
+		execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+		execbuf.cliprects_ptr = to_user_pointer(fence);
+		execbuf.num_cliprects = 3;
+
+		fence[0].handle = syncobj[0];
+		fence[0].flags = LOCAL_EXEC_FENCE_WAIT;
+
+		fence[1].handle = syncobj[1];
+		fence[1].flags = LOCAL_EXEC_FENCE_SIGNAL;
+
+		fence[2].handle = syncobj[2];
+		fence[2].flags = LOCAL_EXEC_FENCE_WAIT;
+
+		count = 0;
+		while (!*(volatile unsigned *)control) {
+			gem_execbuf(fd, &execbuf);
+			count++;
+		}
+		control[2] = count;
+	}
+	igt_fork(child, 1) {
+		struct local_gem_exec_fence fence;
+		unsigned long count;
+
+		execbuf.flags = LOCAL_EXEC_FENCE_ARRAY;
+		execbuf.cliprects_ptr = to_user_pointer(&fence);
+		execbuf.num_cliprects = 1;
+
+		fence.handle = syncobj[2];
+		fence.flags = LOCAL_EXEC_FENCE_SIGNAL;
+
+		count = 0;
+		while (!*(volatile unsigned *)control) {
+			gem_execbuf(fd, &execbuf);
+			count++;
+		}
+		control[3] = count;
+	}
+
+	sleep(1);
+	*control = 1;
+	igt_waitchildren();
+
+	igt_info("Pipe=[%u, %u], gooseberry=%u\n",
+		 control[1], control[2], control[3]);
+	munmap(control, 4096);
+
+	gem_sync(fd, obj.handle);
+	gem_close(fd, obj.handle);
+
+	for (int i = 0; i < ARRAY_SIZE(syncobj); i++)
+		syncobj_destroy(fd, syncobj[i]);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(i915);
+		igt_require(gem_has_exec_fence(i915));
+		gem_require_mmap_wc(i915);
+
+		gem_submission_print_method(i915);
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_fork_hang_detector(i915);
+		}
+
+		igt_subtest("basic-busy-all")
+			test_fence_busy_all(i915, 0);
+		igt_subtest("basic-wait-all")
+			test_fence_busy_all(i915, WAIT);
+
+		igt_fixture {
+			igt_stop_hang_detector();
+		}
+
+		igt_subtest("busy-hang-all")
+			test_fence_busy_all(i915, HANG);
+		igt_subtest("wait-hang-all")
+			test_fence_busy_all(i915, WAIT | HANG);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_group {
+			igt_fixture {
+				igt_require(gem_has_ring(i915, e->exec_id | e->flags));
+				igt_require(gem_can_store_dword(i915, e->exec_id | e->flags));
+			}
+
+			igt_subtest_group {
+				igt_fixture {
+					igt_fork_hang_detector(i915);
+				}
+
+				igt_subtest_f("%sbusy-%s",
+						e->exec_id == 0 ? "basic-" : "",
+						e->name)
+					test_fence_busy(i915, e->exec_id | e->flags, 0);
+				igt_subtest_f("%swait-%s",
+						e->exec_id == 0 ? "basic-" : "",
+						e->name)
+					test_fence_busy(i915, e->exec_id | e->flags, WAIT);
+				igt_subtest_f("%sawait-%s",
+						e->exec_id == 0 ? "basic-" : "",
+						e->name)
+					test_fence_await(i915, e->exec_id | e->flags, 0);
+				igt_subtest_f("nb-await-%s", e->name)
+					test_fence_await(i915, e->exec_id | e->flags, NONBLOCK);
+
+				igt_subtest_f("keep-in-fence-%s", e->name)
+					test_keep_in_fence(i915, e->exec_id | e->flags, 0);
+
+				if (e->exec_id &&
+				    !(e->exec_id == I915_EXEC_BSD && !e->flags)) {
+					igt_subtest_f("parallel-%s", e->name) {
+						igt_require(has_submit_fence(i915));
+						igt_until_timeout(2)
+							test_parallel(i915, e->exec_id | e->flags);
+					}
+				}
+
+				igt_fixture {
+					igt_stop_hang_detector();
+				}
+			}
+
+			igt_subtest_group {
+				igt_hang_t hang;
+
+				igt_skip_on_simulation();
+
+				igt_fixture {
+					hang = igt_allow_hang(i915, 0, 0);
+				}
+
+				igt_subtest_f("busy-hang-%s", e->name)
+					test_fence_busy(i915, e->exec_id | e->flags, HANG);
+				igt_subtest_f("wait-hang-%s", e->name)
+					test_fence_busy(i915, e->exec_id | e->flags, HANG | WAIT);
+				igt_subtest_f("await-hang-%s", e->name)
+					test_fence_await(i915, e->exec_id | e->flags, HANG);
+				igt_subtest_f("nb-await-hang-%s", e->name)
+					test_fence_await(i915, e->exec_id | e->flags, NONBLOCK | HANG);
+				igt_fixture {
+					igt_disallow_hang(i915, hang);
+				}
+			}
+		}
+	}
+
+	igt_subtest_group {
+		long ring_size = 0;
+
+		igt_fixture {
+			ring_size = gem_measure_ring_inflight(i915, ALL_ENGINES, 0) - 1;
+			igt_info("Ring size: %ld batches\n", ring_size);
+			igt_require(ring_size);
+
+			gem_require_contexts(i915);
+		}
+
+		igt_subtest("long-history")
+			test_long_history(i915, ring_size, 0);
+
+		igt_subtest("expired-history")
+			test_long_history(i915, ring_size, EXPIRED);
+	}
+
+	igt_subtest("flip") {
+		gem_quiescent_gpu(i915);
+		test_fence_flip(i915);
+	}
+
+	igt_subtest_group { /* syncobj */
+		igt_fixture {
+			igt_require(exec_has_fence_array(i915));
+			igt_assert(has_syncobj(i915));
+			igt_fork_hang_detector(i915);
+		}
+
+		igt_subtest("invalid-fence-array")
+			test_invalid_fence_array(i915);
+
+		igt_subtest("syncobj-unused-fence")
+			test_syncobj_unused_fence(i915);
+
+		igt_subtest("syncobj-invalid-wait")
+			test_syncobj_invalid_wait(i915);
+
+		igt_subtest("syncobj-invalid-flags")
+			test_syncobj_invalid_flags(i915);
+
+		igt_subtest("syncobj-signal")
+			test_syncobj_signal(i915);
+
+		igt_subtest("syncobj-wait")
+			test_syncobj_wait(i915);
+
+		igt_subtest("syncobj-export")
+			test_syncobj_export(i915);
+
+		igt_subtest("syncobj-repeat")
+			test_syncobj_repeat(i915);
+
+		igt_subtest("syncobj-import")
+			test_syncobj_import(i915);
+
+		igt_subtest("syncobj-channel")
+			test_syncobj_channel(i915);
+
+		igt_fixture {
+			igt_stop_hang_detector();
+		}
+	}
+
+	igt_fixture {
+		close(i915);
+	}
+}
diff --git a/tests/i915/gem_exec_flush.c b/tests/i915/gem_exec_flush.c
new file mode 100644
index 00000000..f820b2a8
--- /dev/null
+++ b/tests/i915/gem_exec_flush.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <time.h>
+
+#include "igt.h"
+#include "igt_x86.h"
+
+IGT_TEST_DESCRIPTION("Basic check of flushing after batches");
+
+#define UNCACHED 0
+#define COHERENT 1
+#define WC 2
+#define WRITE 4
+#define KERNEL 8
+#define SET_DOMAIN 16
+#define BEFORE 32
+#define INTERRUPTIBLE 64
+#define CMDPARSER 128
+#define BASIC 256
+#define MOVNT 512
+
+#if defined(__x86_64__) && !defined(__clang__)
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#include <smmintrin.h>
+__attribute__((noinline))
+static uint32_t movnt(uint32_t *map, int i)
+{
+	__m128i tmp;
+
+	tmp = _mm_stream_load_si128((__m128i *)map + i/4);
+	switch (i%4) { /* gcc! */
+	default:
+	case 0: return _mm_extract_epi32(tmp, 0);
+	case 1: return _mm_extract_epi32(tmp, 1);
+	case 2: return _mm_extract_epi32(tmp, 2);
+	case 3: return _mm_extract_epi32(tmp, 3);
+	}
+}
+static inline unsigned x86_64_features(void)
+{
+	return igt_x86_features();
+}
+#pragma GCC pop_options
+#else
+static inline unsigned x86_64_features(void)
+{
+	return 0;
+}
+static uint32_t movnt(uint32_t *map, int i)
+{
+	igt_assert(!"reached");
+}
+#endif
+
+static void run(int fd, unsigned ring, int nchild, int timeout,
+		unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+
+	/* The crux of this testing is whether writes by the GPU are coherent
+	 * from the CPU.
+	 *
+	 * For example, using plain clflush (the simplest and most visible
+	 * in terms of function calls / syscalls) we have two tests which
+	 * perform:
+	 *
+	 * USER (0):
+	 *	execbuf(map[i] = i);
+	 *	sync();
+	 *	clflush(&map[i]);
+	 *	assert(map[i] == i);
+	 *
+	 *	execbuf(map[i] = i ^ ~0);
+	 *	sync();
+	 *	clflush(&map[i]);
+	 *	assert(map[i] == i ^ ~0);
+	 *
+	 * BEFORE:
+	 *	clflush(&map[i]);
+	 *	execbuf(map[i] = i);
+	 *	sync();
+	 *	assert(map[i] == i);
+	 *
+	 *	clflush(&map[i]);
+	 *	execbuf(map[i] = i ^ ~0);
+	 *	sync();
+	 *	assert(map[i] == i ^ ~0);
+	 *
+	 * The assertion here is that the cacheline invalidations are precise
+	 * and we have no speculative prefetch that can see the future map[i]
+	 * access and bring it ahead of the execution, or accidental cache
+	 * pollution by the kernel.
+	 */
+
+	igt_fork(child, nchild) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj[3];
+		struct drm_i915_gem_relocation_entry reloc0[1024];
+		struct drm_i915_gem_relocation_entry reloc1[1024];
+		struct drm_i915_gem_execbuffer2 execbuf;
+		unsigned long cycles = 0;
+		bool snoop = false;
+		uint32_t *ptr;
+		uint32_t *map;
+		int i;
+
+		memset(obj, 0, sizeof(obj));
+		obj[0].handle = gem_create(fd, 4096);
+		obj[0].flags |= EXEC_OBJECT_WRITE;
+
+		if (flags & WC) {
+			igt_assert(flags & COHERENT);
+			map = gem_mmap__wc(fd, obj[0].handle, 0, 4096, PROT_WRITE);
+			gem_set_domain(fd, obj[0].handle,
+				       I915_GEM_DOMAIN_WC,
+				       I915_GEM_DOMAIN_WC);
+		} else {
+			snoop = flags & COHERENT;
+			gem_set_caching(fd, obj[0].handle, snoop);
+			map = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_WRITE);
+			gem_set_domain(fd, obj[0].handle,
+				       I915_GEM_DOMAIN_CPU,
+				       I915_GEM_DOMAIN_CPU);
+		}
+
+		for (i = 0; i < 1024; i++)
+			map[i] = 0xabcdabcd;
+
+		gem_set_domain(fd, obj[0].handle,
+			       I915_GEM_DOMAIN_WC,
+			       I915_GEM_DOMAIN_WC);
+
+		/* Prepara a mappable binding to prevent pread mighrating */
+		if (!snoop) {
+			ptr = gem_mmap__gtt(fd, obj[0].handle, 4096, PROT_READ);
+			igt_assert_eq_u32(ptr[0], 0xabcdabcd);
+			munmap(ptr, 4096);
+		}
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(obj);
+		execbuf.buffer_count = 3;
+		execbuf.flags = ring | (1 << 11) | (1<<12);
+		if (gen < 6)
+			execbuf.flags |= I915_EXEC_SECURE;
+
+		obj[1].handle = gem_create(fd, 1024*64);
+		obj[2].handle = gem_create(fd, 1024*64);
+		gem_write(fd, obj[2].handle, 0, &bbe, sizeof(bbe));
+		igt_require(__gem_execbuf(fd, &execbuf) == 0);
+
+		obj[1].relocation_count = 1;
+		obj[2].relocation_count = 1;
+
+		ptr = gem_mmap__wc(fd, obj[1].handle, 0, 64*1024,
+				   PROT_WRITE | PROT_READ);
+		gem_set_domain(fd, obj[1].handle,
+			       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+		memset(reloc0, 0, sizeof(reloc0));
+		for (i = 0; i < 1024; i++) {
+			uint64_t offset;
+			uint32_t *b = &ptr[16 * i];
+
+			reloc0[i].presumed_offset = obj[0].offset;
+			reloc0[i].offset = (b - ptr + 1) * sizeof(*ptr);
+			reloc0[i].delta = i * sizeof(uint32_t);
+			reloc0[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+			reloc0[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+			offset = obj[0].offset + reloc0[i].delta;
+			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			if (gen >= 8) {
+				*b++ = offset;
+				*b++ = offset >> 32;
+			} else if (gen >= 4) {
+				*b++ = 0;
+				*b++ = offset;
+				reloc0[i].offset += sizeof(*ptr);
+			} else {
+				b[-1] -= 1;
+				*b++ = offset;
+			}
+			*b++ = i;
+			*b++ = MI_BATCH_BUFFER_END;
+		}
+		munmap(ptr, 64*1024);
+
+		ptr = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024,
+				   PROT_WRITE | PROT_READ);
+		gem_set_domain(fd, obj[2].handle,
+			       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+		memset(reloc1, 0, sizeof(reloc1));
+		for (i = 0; i < 1024; i++) {
+			uint64_t offset;
+			uint32_t *b = &ptr[16 * i];
+
+			reloc1[i].presumed_offset = obj[0].offset;
+			reloc1[i].offset = (b - ptr + 1) * sizeof(*ptr);
+			reloc1[i].delta = i * sizeof(uint32_t);
+			reloc1[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+			reloc1[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+			offset = obj[0].offset + reloc1[i].delta;
+			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			if (gen >= 8) {
+				*b++ = offset;
+				*b++ = offset >> 32;
+			} else if (gen >= 4) {
+				*b++ = 0;
+				*b++ = offset;
+				reloc1[i].offset += sizeof(*ptr);
+			} else {
+				b[-1] -= 1;
+				*b++ = offset;
+			}
+			*b++ = i ^ 0xffffffff;
+			*b++ = MI_BATCH_BUFFER_END;
+		}
+		munmap(ptr, 64*1024);
+
+		igt_until_timeout(timeout) {
+			bool xor = false;
+			int idx = cycles++ % 1024;
+
+			/* Inspect a different cacheline each iteration */
+			i = 16 * (idx % 64) + (idx / 64);
+			obj[1].relocs_ptr = to_user_pointer(&reloc0[i]);
+			obj[2].relocs_ptr = to_user_pointer(&reloc1[i]);
+			igt_assert_eq_u64(reloc0[i].presumed_offset, obj[0].offset);
+			igt_assert_eq_u64(reloc1[i].presumed_offset, obj[0].offset);
+			execbuf.batch_start_offset =  64*i;
+
+overwrite:
+			if ((flags & BEFORE) &&
+			    !((flags & COHERENT) || gem_has_llc(fd)))
+				igt_clflush_range(&map[i], sizeof(map[i]));
+
+			execbuf.buffer_count = 2 + xor;
+			gem_execbuf(fd, &execbuf);
+
+			if (flags & SET_DOMAIN) {
+				unsigned domain = flags & WC ? I915_GEM_DOMAIN_WC : I915_GEM_DOMAIN_CPU;
+				igt_while_interruptible(flags & INTERRUPTIBLE)
+					gem_set_domain(fd, obj[0].handle,
+						       domain, (flags & WRITE) ? domain : 0);
+
+				if (xor)
+					igt_assert_eq_u32(map[i], i ^ 0xffffffff);
+				else
+					igt_assert_eq_u32(map[i], i);
+
+				if (flags & WRITE)
+					map[i] = 0xdeadbeef;
+			} else if (flags & KERNEL) {
+				uint32_t val;
+
+				igt_while_interruptible(flags & INTERRUPTIBLE)
+					gem_read(fd, obj[0].handle,
+						 i*sizeof(uint32_t),
+						 &val, sizeof(val));
+
+				if (xor)
+					igt_assert_eq_u32(val, i ^ 0xffffffff);
+				else
+					igt_assert_eq_u32(val, i);
+
+				if (flags & WRITE) {
+					val = 0xdeadbeef;
+					igt_while_interruptible(flags & INTERRUPTIBLE)
+						gem_write(fd, obj[0].handle,
+							  i*sizeof(uint32_t),
+							  &val, sizeof(val));
+				}
+			} else if (flags & MOVNT) {
+				uint32_t x;
+
+				igt_while_interruptible(flags & INTERRUPTIBLE)
+					gem_sync(fd, obj[0].handle);
+
+				x = movnt(map, i);
+				if (xor)
+					igt_assert_eq_u32(x, i ^ 0xffffffff);
+				else
+					igt_assert_eq_u32(x, i);
+
+				if (flags & WRITE)
+					map[i] = 0xdeadbeef;
+			} else {
+				igt_while_interruptible(flags & INTERRUPTIBLE)
+					gem_sync(fd, obj[0].handle);
+
+				if (!(flags & (BEFORE | COHERENT)) &&
+				    !gem_has_llc(fd))
+					igt_clflush_range(&map[i], sizeof(map[i]));
+
+				if (xor)
+					igt_assert_eq_u32(map[i], i ^ 0xffffffff);
+				else
+					igt_assert_eq_u32(map[i], i);
+
+				if (flags & WRITE) {
+					map[i] = 0xdeadbeef;
+					if (!(flags & (COHERENT | BEFORE)))
+						igt_clflush_range(&map[i], sizeof(map[i]));
+				}
+			}
+
+			if (!xor) {
+				xor= true;
+				goto overwrite;
+			}
+		}
+		igt_info("Child[%d]: %lu cycles\n", child, cycles);
+
+		gem_close(fd, obj[2].handle);
+		gem_close(fd, obj[1].handle);
+
+		munmap(map, 4096);
+		gem_close(fd, obj[0].handle);
+	}
+	igt_waitchildren();
+}
+
+enum batch_mode {
+	BATCH_KERNEL,
+	BATCH_USER,
+	BATCH_CPU,
+	BATCH_GTT,
+	BATCH_WC,
+};
+static void batch(int fd, unsigned ring, int nchild, int timeout,
+		  enum batch_mode mode, unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+
+	if (flags & CMDPARSER) {
+		int cmdparser = -1;
+                drm_i915_getparam_t gp;
+
+		gp.param = I915_PARAM_CMD_PARSER_VERSION;
+		gp.value = &cmdparser;
+		drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+		igt_require(cmdparser > 0);
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, nchild) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj[2];
+		struct drm_i915_gem_relocation_entry reloc;
+		struct drm_i915_gem_execbuffer2 execbuf;
+		unsigned long cycles = 0;
+		uint32_t *ptr;
+		uint32_t *map;
+		int i;
+
+		memset(obj, 0, sizeof(obj));
+		obj[0].handle = gem_create(fd, 4096);
+		obj[0].flags |= EXEC_OBJECT_WRITE;
+
+		gem_set_caching(fd, obj[0].handle, !!(flags & COHERENT));
+		map = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_WRITE);
+
+		gem_set_domain(fd, obj[0].handle,
+				I915_GEM_DOMAIN_CPU,
+				I915_GEM_DOMAIN_CPU);
+		for (i = 0; i < 1024; i++)
+			map[i] = 0xabcdabcd;
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(obj);
+		execbuf.buffer_count = 2;
+		execbuf.flags = ring | (1 << 11) | (1<<12);
+		if (gen < 6)
+			execbuf.flags |= I915_EXEC_SECURE;
+
+		obj[1].handle = gem_create(fd, 64<<10);
+		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+		igt_require(__gem_execbuf(fd, &execbuf) == 0);
+
+		obj[1].relocation_count = 1;
+		obj[1].relocs_ptr = to_user_pointer(&reloc);
+
+		switch (mode) {
+		case BATCH_CPU:
+		case BATCH_USER:
+			ptr = gem_mmap__cpu(fd, obj[1].handle, 0, 64<<10,
+					    PROT_WRITE);
+			break;
+
+		case BATCH_WC:
+			ptr = gem_mmap__wc(fd, obj[1].handle, 0, 64<<10,
+					    PROT_WRITE);
+			break;
+
+		case BATCH_GTT:
+			ptr = gem_mmap__gtt(fd, obj[1].handle, 64<<10,
+					    PROT_WRITE);
+			break;
+
+		case BATCH_KERNEL:
+			ptr = mmap(0, 64<<10, PROT_WRITE,
+				   MAP_PRIVATE | MAP_ANON, -1, 0);
+			break;
+
+		default:
+			igt_assert(!"reachable");
+			ptr = NULL;
+			break;
+		}
+
+		memset(&reloc, 0, sizeof(reloc));
+		reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+		igt_until_timeout(timeout) {
+			execbuf.batch_start_offset = 0;
+			reloc.offset = sizeof(uint32_t);
+			if (gen >= 4 && gen < 8)
+				reloc.offset += sizeof(uint32_t);
+
+			for (i = 0; i < 1024; i++) {
+				uint64_t offset;
+				uint32_t *start = &ptr[execbuf.batch_start_offset/sizeof(*start)];
+				uint32_t *b = start;
+
+				switch (mode) {
+				case BATCH_CPU:
+					gem_set_domain(fd, obj[1].handle,
+						       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+					break;
+
+				case BATCH_WC:
+					gem_set_domain(fd, obj[1].handle,
+						       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+					break;
+
+				case BATCH_GTT:
+					gem_set_domain(fd, obj[1].handle,
+						       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+					break;
+
+				case BATCH_USER:
+				case BATCH_KERNEL:
+					break;
+				}
+
+				reloc.presumed_offset = obj[0].offset;
+				reloc.delta = i * sizeof(uint32_t);
+
+				offset = reloc.presumed_offset + reloc.delta;
+				*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+				if (gen >= 8) {
+					*b++ = offset;
+					*b++ = offset >> 32;
+				} else if (gen >= 4) {
+					*b++ = 0;
+					*b++ = offset;
+				} else {
+					b[-1] -= 1;
+					*b++ = offset;
+				}
+				*b++ = cycles + i;
+				*b++ = MI_BATCH_BUFFER_END;
+
+				if (flags & CMDPARSER) {
+					execbuf.batch_len =
+						(b - start) * sizeof(uint32_t);
+					if (execbuf.batch_len & 4)
+						execbuf.batch_len += 4;
+				}
+
+				switch (mode) {
+				case BATCH_KERNEL:
+					gem_write(fd, obj[1].handle,
+						  execbuf.batch_start_offset,
+						  start, (b - start) * sizeof(uint32_t));
+					break;
+
+				case BATCH_USER:
+					if (!gem_has_llc(fd))
+						igt_clflush_range(start,
+								  (b - start) * sizeof(uint32_t));
+					break;
+
+				case BATCH_CPU:
+				case BATCH_GTT:
+				case BATCH_WC:
+					break;
+				}
+				gem_execbuf(fd, &execbuf);
+
+				execbuf.batch_start_offset += 64;
+				reloc.offset += 64;
+			}
+
+			if (!(flags & COHERENT)) {
+				gem_set_domain(fd, obj[0].handle,
+					       I915_GEM_DOMAIN_CPU,
+					       I915_GEM_DOMAIN_CPU);
+			} else
+				gem_sync(fd, obj[0].handle);
+			for (i = 0; i < 1024; i++) {
+				igt_assert_eq_u32(map[i], cycles + i);
+				map[i] = 0xabcdabcd ^ cycles;
+			}
+			cycles += 1024;
+
+			if (mode == BATCH_USER)
+				gem_sync(fd, obj[1].handle);
+		}
+		igt_info("Child[%d]: %lu cycles\n", child, cycles);
+
+		munmap(ptr, 64<<10);
+		gem_close(fd, obj[1].handle);
+
+		munmap(map, 4096);
+		gem_close(fd, obj[0].handle);
+	}
+	igt_waitchildren();
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static const char *yesno(bool x)
+{
+	return x ? "yes" : "no";
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	const struct batch {
+		const char *name;
+		unsigned mode;
+	} batches[] = {
+		{ "kernel", BATCH_KERNEL },
+		{ "user", BATCH_USER },
+		{ "cpu", BATCH_CPU },
+		{ "gtt", BATCH_GTT },
+		{ "wc", BATCH_WC },
+		{ NULL }
+	};
+	const struct mode {
+		const char *name;
+		unsigned flags;
+	} modes[] = {
+		{ "ro", BASIC },
+		{ "rw", BASIC | WRITE },
+		{ "ro-before", BEFORE },
+		{ "rw-before", BEFORE | WRITE },
+		{ "pro", BASIC | KERNEL },
+		{ "prw", BASIC | KERNEL | WRITE },
+		{ "set", BASIC | SET_DOMAIN | WRITE },
+		{ NULL }
+	};
+	unsigned cpu = x86_64_features();
+	int fd = -1;
+
+	igt_fixture {
+		igt_require(igt_setup_clflush());
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		gem_require_mmap_wc(fd);
+		igt_require(gem_can_store_dword(fd, 0));
+		igt_info("Has LLC? %s\n", yesno(gem_has_llc(fd)));
+
+		if (cpu) {
+			char str[1024];
+
+			igt_info("CPU features: %s\n",
+				 igt_x86_features_to_string(cpu, str));
+		}
+
+		igt_fork_hang_detector(fd);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) igt_subtest_group {
+		unsigned ring = e->exec_id | e->flags;
+		unsigned timeout = 5 + 120*!!e->exec_id;
+
+		igt_fixture {
+			gem_require_ring(fd, ring);
+			igt_require(gem_can_store_dword(fd, ring));
+		}
+
+		for (const struct batch *b = batches; b->name; b++) {
+			igt_subtest_f("%sbatch-%s-%s-uc",
+				      b == batches && e->exec_id == 0 ? "basic-" : "",
+				      b->name,
+				      e->name)
+				batch(fd, ring, ncpus, timeout, b->mode, 0);
+			igt_subtest_f("%sbatch-%s-%s-wb",
+				      b == batches && e->exec_id == 0 ? "basic-" : "",
+				      b->name,
+				      e->name)
+				batch(fd, ring, ncpus, timeout, b->mode, COHERENT);
+			igt_subtest_f("%sbatch-%s-%s-cmd",
+				      b == batches && e->exec_id == 0 ? "basic-" : "",
+				      b->name,
+				      e->name)
+				batch(fd, ring, ncpus, timeout, b->mode,
+				      COHERENT | CMDPARSER);
+		}
+
+		for (const struct mode *m = modes; m->name; m++) {
+			igt_subtest_f("%suc-%s-%s",
+				      (m->flags & BASIC && e->exec_id == 0) ? "basic-" : "",
+				      m->name,
+				      e->name)
+				run(fd, ring, ncpus, timeout,
+				    UNCACHED | m->flags);
+
+			igt_subtest_f("uc-%s-%s-interruptible",
+				      m->name,
+				      e->name)
+				run(fd, ring, ncpus, timeout,
+				    UNCACHED | m->flags | INTERRUPTIBLE);
+
+			igt_subtest_f("%swb-%s-%s",
+				      e->exec_id == 0 ? "basic-" : "",
+				      m->name,
+				      e->name)
+				run(fd, ring, ncpus, timeout,
+				    COHERENT | m->flags);
+
+			igt_subtest_f("wb-%s-%s-interruptible",
+				      m->name,
+				      e->name)
+				run(fd, ring, ncpus, timeout,
+				    COHERENT | m->flags | INTERRUPTIBLE);
+
+			igt_subtest_f("wc-%s-%s",
+				      m->name,
+				      e->name)
+				run(fd, ring, ncpus, timeout,
+				    COHERENT | WC | m->flags);
+
+			igt_subtest_f("wc-%s-%s-interruptible",
+				      m->name,
+				      e->name)
+				run(fd, ring, ncpus, timeout,
+				    COHERENT | WC | m->flags | INTERRUPTIBLE);
+
+			igt_subtest_f("stream-%s-%s",
+				      m->name,
+				      e->name) {
+				igt_require(cpu & SSE4_1);
+				run(fd, ring, ncpus, timeout,
+				    MOVNT | COHERENT | WC | m->flags);
+			}
+
+			igt_subtest_f("stream-%s-%s-interruptible",
+				      m->name,
+				      e->name) {
+				igt_require(cpu & SSE4_1);
+				run(fd, ring, ncpus, timeout,
+				    MOVNT | COHERENT | WC | m->flags | INTERRUPTIBLE);
+			}
+		}
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_gttfill.c b/tests/i915/gem_exec_gttfill.c
new file mode 100644
index 00000000..efd612bb
--- /dev/null
+++ b/tests/i915/gem_exec_gttfill.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_rand.h"
+
+IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
+
+#define BATCH_SIZE (4096<<10)
+
+struct batch {
+	uint32_t handle;
+	void *ptr;
+};
+
+static void xchg_batch(void *array, unsigned int i, unsigned int j)
+{
+	struct batch *batches = array;
+	struct batch tmp;
+
+	tmp = batches[i];
+	batches[i] = batches[j];
+	batches[j] = tmp;
+}
+
+static void submit(int fd, int gen,
+		   struct drm_i915_gem_execbuffer2 *eb,
+		   struct drm_i915_gem_relocation_entry *reloc,
+		   struct batch *batches, unsigned int count)
+{
+	struct drm_i915_gem_exec_object2 obj;
+	uint32_t batch[16];
+	unsigned n;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.relocs_ptr = to_user_pointer(reloc);
+	obj.relocation_count = 2;
+
+	memset(reloc, 0, 2*sizeof(*reloc));
+	reloc[0].offset = eb->batch_start_offset;
+	reloc[0].offset += sizeof(uint32_t);
+	reloc[0].delta = BATCH_SIZE - eb->batch_start_offset - 8;
+	reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc[1].offset = eb->batch_start_offset;
+	reloc[1].offset += 3*sizeof(uint32_t);
+	reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+
+	n = 0;
+	batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[n] |= 1 << 21;
+		batch[n]++;
+		batch[++n] = reloc[0].delta;/* lower_32_bits(address) */
+		batch[++n] = 0; /* upper_32_bits(address) */
+	} else if (gen >= 4) {
+		batch[++n] = 0;
+		batch[++n] = reloc[0].delta;/* lower_32_bits(address) */
+		reloc[0].offset += sizeof(uint32_t);
+	} else {
+		batch[n]--;
+		batch[++n] = reloc[0].delta;/* lower_32_bits(address) */
+		reloc[1].offset -= sizeof(uint32_t);
+	}
+	batch[++n] = 0; /* lower_32_bits(value) */
+	batch[++n] = 0; /* upper_32_bits(value) / nop */
+	batch[++n] = MI_BATCH_BUFFER_END;
+
+	eb->buffers_ptr = to_user_pointer(&obj);
+	for (unsigned i = 0; i < count; i++) {
+		obj.handle = batches[i].handle;
+		reloc[0].target_handle = obj.handle;
+		reloc[1].target_handle = obj.handle;
+
+		obj.offset = 0;
+		reloc[0].presumed_offset = obj.offset;
+		reloc[1].presumed_offset = obj.offset;
+
+		memcpy(batches[i].ptr + eb->batch_start_offset,
+		       batch, sizeof(batch));
+
+		gem_execbuf(fd, eb);
+	}
+	/* As we have been lying about the write_domain, we need to do a sync */
+	gem_sync(fd, obj.handle);
+}
+
+static void fillgtt(int fd, unsigned ring, int timeout)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_relocation_entry reloc[2];
+	volatile uint64_t *shared;
+	struct batch *batches;
+	unsigned engines[16];
+	unsigned nengine;
+	unsigned engine;
+	uint64_t size;
+	unsigned count;
+
+	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(shared != MAP_FAILED);
+
+	nengine = 0;
+	if (ring == 0) {
+		for_each_physical_engine(fd, engine) {
+			if (!gem_can_store_dword(fd, engine))
+				continue;
+
+			engines[nengine++] = engine;
+		}
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+
+		engines[nengine++] = ring;
+	}
+	igt_require(nengine);
+
+	size = gem_aperture_size(fd);
+	if (size > 1ull<<32) /* Limit to 4GiB as we do not use allow-48b */
+		size = 1ull << 32;
+	igt_require(size < (1ull<<32) * BATCH_SIZE);
+
+	count = size / BATCH_SIZE + 1;
+	igt_debug("Using %'d batches to fill %'llu aperture on %d engines\n",
+		  count, (long long)size, nengine);
+	intel_require_memory(count, BATCH_SIZE, CHECK_RAM);
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffer_count = 1;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	batches = calloc(count, sizeof(*batches));
+	igt_assert(batches);
+	for (unsigned i = 0; i < count; i++) {
+		batches[i].handle = gem_create(fd, BATCH_SIZE);
+		batches[i].ptr =
+			__gem_mmap__wc(fd, batches[i].handle,
+				       0, BATCH_SIZE, PROT_WRITE);
+		if (!batches[i].ptr) {
+			batches[i].ptr =
+				__gem_mmap__gtt(fd, batches[i].handle,
+						BATCH_SIZE, PROT_WRITE);
+		}
+		igt_require(batches[i].ptr);
+	}
+
+	/* Flush all memory before we start the timer */
+	submit(fd, gen, &execbuf, reloc, batches, count);
+
+	igt_fork(child, nengine) {
+		uint64_t cycles = 0;
+		hars_petruska_f54_1_random_perturb(child);
+		igt_permute_array(batches, count, xchg_batch);
+		execbuf.batch_start_offset = child*64;
+		execbuf.flags |= engines[child];
+		igt_until_timeout(timeout) {
+			submit(fd, gen, &execbuf, reloc, batches, count);
+			for (unsigned i = 0; i < count; i++) {
+				uint64_t offset, delta;
+
+				offset = *(uint64_t *)(batches[i].ptr + reloc[1].offset);
+				delta = *(uint64_t *)(batches[i].ptr + reloc[0].delta);
+				igt_assert_eq_u64(offset, delta);
+			}
+			cycles++;
+		}
+		shared[child] = cycles;
+		igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles);
+	}
+	igt_waitchildren();
+
+	for (unsigned i = 0; i < count; i++) {
+		munmap(batches[i].ptr, BATCH_SIZE);
+		gem_close(fd, batches[i].handle);
+	}
+
+	shared[nengine] = 0;
+	for (unsigned i = 0; i < nengine; i++)
+		shared[nengine] += shared[i];
+	igt_info("Total: %llu cycles\n", (long long)shared[nengine]);
+
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int device = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+		igt_require(gem_can_store_dword(device, 0));
+		igt_fork_hang_detector(device);
+	}
+
+	igt_subtest("basic")
+		fillgtt(device, 0, 1); /* just enough to run a single pass */
+
+	for (e = intel_execution_engines; e->name; e++)
+		igt_subtest_f("%s", e->name)
+			fillgtt(device, e->exec_id | e->flags, 20);
+
+	igt_subtest("all")
+		fillgtt(device, 0, 150);
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(device);
+	}
+}
diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
new file mode 100644
index 00000000..de16322a
--- /dev/null
+++ b/tests/i915/gem_exec_latency.c
@@ -0,0 +1,721 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/signal.h>
+#include <time.h>
+#include <sched.h>
+
+#include "drm.h"
+
+#include "igt_sysfs.h"
+#include "igt_vgem.h"
+#include "igt_dummyload.h"
+#include "igt_stats.h"
+
+#include "i915/gem_ring.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+#define CORK 0x1
+#define PREEMPT 0x2
+
+static unsigned int ring_size;
+
+static void
+poll_ring(int fd, unsigned ring, const char *name)
+{
+	const struct igt_spin_factory opts = {
+		.engine = ring,
+		.flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST,
+	};
+	struct timespec tv = {};
+	unsigned long cycles;
+	igt_spin_t *spin[2];
+	uint64_t elapsed;
+	uint32_t cmd;
+
+	gem_require_ring(fd, ring);
+	igt_require(gem_can_store_dword(fd, ring));
+
+	spin[0] = __igt_spin_batch_factory(fd, &opts);
+	igt_assert(spin[0]->running);
+	cmd = *spin[0]->batch;
+
+	spin[1] = __igt_spin_batch_factory(fd, &opts);
+	igt_assert(spin[1]->running);
+	igt_assert(cmd == *spin[1]->batch);
+
+	igt_spin_batch_end(spin[0]);
+	while (!READ_ONCE(*spin[1]->running))
+		;
+	igt_assert(!gem_bo_busy(fd, spin[0]->handle));
+
+	cycles = 0;
+	while ((elapsed = igt_nsec_elapsed(&tv)) < 2ull << 30) {
+		unsigned int idx = cycles++ & 1;
+
+		*spin[idx]->batch = cmd;
+		*spin[idx]->running = 0;
+		gem_execbuf(fd, &spin[idx]->execbuf);
+
+		igt_spin_batch_end(spin[!idx]);
+		while (!READ_ONCE(*spin[idx]->running))
+			;
+	}
+
+	igt_info("%s completed %ld cycles: %.3f us\n",
+		 name, cycles, elapsed*1e-3/cycles);
+
+	igt_spin_batch_free(fd, spin[1]);
+	igt_spin_batch_free(fd, spin[0]);
+}
+
+#define RCS_TIMESTAMP (0x2000 + 0x358)
+static void latency_on_ring(int fd,
+			    unsigned ring, const char *name,
+			    unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const int has_64bit_reloc = gen >= 8;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	IGT_CORK_HANDLE(c);
+	volatile uint32_t *reg;
+	unsigned repeats = ring_size;
+	uint32_t start, end, *map, *results;
+	uint64_t offset;
+	double gpu_latency;
+	int i, j;
+
+	reg = (volatile uint32_t *)((volatile char *)igt_global_mmio + RCS_TIMESTAMP);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, 4096);
+	obj[1].flags = EXEC_OBJECT_WRITE;
+	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
+
+	obj[2].handle = gem_create(fd, 64*1024);
+	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+	map[0] = MI_BATCH_BUFFER_END;
+	gem_execbuf(fd, &execbuf);
+
+	memset(&reloc,0, sizeof(reloc));
+	obj[2].relocation_count = 1;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+
+	reloc.target_handle = flags & CORK ? 1 : 0;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.presumed_offset = obj[1].offset;
+
+	for (j = 0; j < repeats; j++) {
+		execbuf.batch_start_offset = 64 * j;
+		reloc.offset =
+			execbuf.batch_start_offset + sizeof(uint32_t);
+		reloc.delta = sizeof(uint32_t) * j;
+
+		offset = reloc.presumed_offset;
+		offset += reloc.delta;
+
+		i = 16 * j;
+		/* MI_STORE_REG_MEM */
+		map[i++] = 0x24 << 23 | 1;
+		if (has_64bit_reloc)
+			map[i-1]++;
+		map[i++] = RCS_TIMESTAMP; /* ring local! */
+		map[i++] = offset;
+		if (has_64bit_reloc)
+			map[i++] = offset >> 32;
+		map[i++] = MI_BATCH_BUFFER_END;
+	}
+
+	if (flags & CORK) {
+		obj[0].handle = igt_cork_plug(&c, fd);
+		execbuf.buffers_ptr = to_user_pointer(&obj[0]);
+		execbuf.buffer_count = 3;
+	}
+
+	start = *reg;
+	for (j = 0; j < repeats; j++) {
+		uint64_t presumed_offset = reloc.presumed_offset;
+
+		execbuf.batch_start_offset = 64 * j;
+		reloc.offset =
+			execbuf.batch_start_offset + sizeof(uint32_t);
+		reloc.delta = sizeof(uint32_t) * j;
+
+		gem_execbuf(fd, &execbuf);
+		igt_assert(reloc.presumed_offset == presumed_offset);
+	}
+	end = *reg;
+	igt_assert(reloc.presumed_offset == obj[1].offset);
+
+	if (flags & CORK)
+		igt_cork_unplug(&c);
+
+	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
+	gpu_latency = (results[repeats-1] - results[0]) / (double)(repeats-1);
+
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	execbuf.batch_start_offset = 0;
+	for (j = 0; j < repeats - 1; j++) {
+		offset = obj[2].offset;
+		offset += 64 * (j + 1);
+
+		i = 16 * j + (has_64bit_reloc ? 4 : 3);
+		map[i] = MI_BATCH_BUFFER_START;
+		if (gen >= 8) {
+			map[i] |= 1 << 8 | 1;
+			map[i + 1] = offset;
+			map[i + 2] = offset >> 32;
+		} else if (gen >= 6) {
+			map[i] |= 1 << 8;
+			map[i + 1] = offset;
+		} else {
+			map[i] |= 2 << 6;
+			map[i + 1] = offset;
+			if (gen < 4)
+				map[i] |= 1;
+		}
+	}
+	offset = obj[2].offset;
+	gem_execbuf(fd, &execbuf);
+	igt_assert(offset == obj[2].offset);
+
+	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
+	igt_info("%s: dispatch latency: %.2f, execution latency: %.2f (target %.2f)\n",
+		 name,
+		 (end - start) / (double)repeats,
+		 gpu_latency, (results[repeats - 1] - results[0]) / (double)(repeats - 1));
+
+	munmap(map, 64*1024);
+	munmap(results, 4096);
+	if (flags & CORK)
+		gem_close(fd, obj[0].handle);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[2].handle);
+}
+
+static void latency_from_ring(int fd,
+			      unsigned ring, const char *name,
+			      unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const int has_64bit_reloc = gen >= 8;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const unsigned int repeats = ring_size / 2;
+	unsigned int other;
+	uint32_t *map, *results;
+	uint32_t ctx[2] = {};
+	int i, j;
+
+	if (flags & PREEMPT) {
+		ctx[0] = gem_context_create(fd);
+		gem_context_set_priority(fd, ctx[0], -1023);
+
+		ctx[1] = gem_context_create(fd);
+		gem_context_set_priority(fd, ctx[1], 1023);
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.rsvd1 = ctx[1];
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, 4096);
+	obj[1].flags = EXEC_OBJECT_WRITE;
+	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
+
+	obj[2].handle = gem_create(fd, 64*1024);
+	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+	map[0] = MI_BATCH_BUFFER_END;
+	gem_execbuf(fd, &execbuf);
+
+	memset(&reloc,0, sizeof(reloc));
+	obj[2].relocation_count = 1;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.presumed_offset = obj[1].offset;
+	reloc.target_handle = flags & CORK ? 1 : 0;
+
+	for_each_physical_engine(fd, other) {
+		igt_spin_t *spin = NULL;
+		IGT_CORK_HANDLE(c);
+
+		gem_set_domain(fd, obj[2].handle,
+			       I915_GEM_DOMAIN_GTT,
+			       I915_GEM_DOMAIN_GTT);
+
+		if (flags & PREEMPT)
+			spin = __igt_spin_batch_new(fd,
+						    .ctx = ctx[0],
+						    .engine = ring);
+
+		if (flags & CORK) {
+			obj[0].handle = igt_cork_plug(&c, fd);
+			execbuf.buffers_ptr = to_user_pointer(&obj[0]);
+			execbuf.buffer_count = 3;
+		}
+
+		for (j = 0; j < repeats; j++) {
+			uint64_t offset;
+
+			execbuf.flags &= ~ENGINE_FLAGS;
+			execbuf.flags |= ring;
+
+			execbuf.batch_start_offset = 64 * j;
+			reloc.offset =
+				execbuf.batch_start_offset + sizeof(uint32_t);
+			reloc.delta = sizeof(uint32_t) * j;
+
+			reloc.presumed_offset = obj[1].offset;
+			offset = reloc.presumed_offset;
+			offset += reloc.delta;
+
+			i = 16 * j;
+			/* MI_STORE_REG_MEM */
+			map[i++] = 0x24 << 23 | 1;
+			if (has_64bit_reloc)
+				map[i-1]++;
+			map[i++] = RCS_TIMESTAMP; /* ring local! */
+			map[i++] = offset;
+			if (has_64bit_reloc)
+				map[i++] = offset >> 32;
+			map[i++] = MI_BATCH_BUFFER_END;
+
+			gem_execbuf(fd, &execbuf);
+
+			execbuf.flags &= ~ENGINE_FLAGS;
+			execbuf.flags |= other;
+
+			execbuf.batch_start_offset = 64 * (j + repeats);
+			reloc.offset =
+				execbuf.batch_start_offset + sizeof(uint32_t);
+			reloc.delta = sizeof(uint32_t) * (j + repeats);
+
+			reloc.presumed_offset = obj[1].offset;
+			offset = reloc.presumed_offset;
+			offset += reloc.delta;
+
+			i = 16 * (j + repeats);
+			/* MI_STORE_REG_MEM */
+			map[i++] = 0x24 << 23 | 1;
+			if (has_64bit_reloc)
+				map[i-1]++;
+			map[i++] = RCS_TIMESTAMP; /* ring local! */
+			map[i++] = offset;
+			if (has_64bit_reloc)
+				map[i++] = offset >> 32;
+			map[i++] = MI_BATCH_BUFFER_END;
+
+			gem_execbuf(fd, &execbuf);
+		}
+
+		if (flags & CORK)
+			igt_cork_unplug(&c);
+		gem_set_domain(fd, obj[1].handle,
+			       I915_GEM_DOMAIN_GTT,
+			       I915_GEM_DOMAIN_GTT);
+		igt_spin_batch_free(fd, spin);
+
+		igt_info("%s-%s delay: %.2f\n",
+			 name, e__->name,
+			 (results[2*repeats-1] - results[0]) / (double)repeats);
+	}
+
+	munmap(map, 64*1024);
+	munmap(results, 4096);
+
+	if (flags & CORK)
+		gem_close(fd, obj[0].handle);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[2].handle);
+
+	if (flags & PREEMPT) {
+		gem_context_destroy(fd, ctx[1]);
+		gem_context_destroy(fd, ctx[0]);
+	}
+}
+
+static void __rearm_spin_batch(igt_spin_t *spin)
+{
+	const uint32_t mi_arb_chk = 0x5 << 23;
+
+       *spin->batch = mi_arb_chk;
+       *spin->running = 0;
+       __sync_synchronize();
+}
+
+static void
+__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags)
+{
+	struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
+
+	eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
+	eb.flags |= flags | I915_EXEC_NO_RELOC;
+
+	gem_execbuf(fd, &eb);
+}
+
+struct rt_pkt {
+	struct igt_mean mean;
+	double min, max;
+};
+
+static bool __spin_wait(int fd, igt_spin_t *spin)
+{
+	while (!READ_ONCE(*spin->running)) {
+		if (!gem_bo_busy(fd, spin->handle))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Test whether RT thread which hogs the CPU a lot can submit work with
+ * reasonable latency.
+ */
+static void
+rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned int flags)
+#define RTIDLE 0x1
+{
+	const char *passname[] = {
+		"warmup",
+		"normal",
+		"rt[0]",
+		"rt[1]",
+		"rt[2]",
+		"rt[3]",
+		"rt[4]",
+		"rt[5]",
+		"rt[6]",
+	};
+#define NPASS ARRAY_SIZE(passname)
+#define MMAP_SZ (64 << 10)
+	const struct igt_spin_factory opts = {
+		.engine = engine,
+		.flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST,
+	};
+	struct rt_pkt *results;
+	unsigned int engines[16];
+	const char *names[16];
+	unsigned int nengine;
+	int ret;
+
+	igt_assert(ARRAY_SIZE(engines) * NPASS * sizeof(*results) <= MMAP_SZ);
+	results = mmap(NULL, MMAP_SZ, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(results != MAP_FAILED);
+
+	nengine = 0;
+	if (engine == ALL_ENGINES) {
+		for_each_physical_engine(fd, engine) {
+			if (!gem_can_store_dword(fd, engine))
+				continue;
+
+			engines[nengine] = engine;
+			names[nengine] = e__->name;
+			nengine++;
+		}
+		igt_require(nengine > 1);
+	} else {
+		igt_require(gem_can_store_dword(fd, engine));
+		engines[nengine] = engine;
+		names[nengine] = name;
+		nengine++;
+	}
+
+	gem_quiescent_gpu(fd);
+
+	igt_fork(child, nengine) {
+		unsigned int pass = 0; /* Three phases: warmup, normal, rt. */
+
+		engine = engines[child];
+		do {
+			struct igt_mean mean;
+			double min = HUGE_VAL;
+			double max = -HUGE_VAL;
+			igt_spin_t *spin;
+
+			igt_mean_init(&mean);
+
+			if (pass == 2) {
+				struct sched_param rt =
+				{ .sched_priority = 99 };
+
+				ret = sched_setscheduler(0,
+							 SCHED_FIFO | SCHED_RESET_ON_FORK,
+							 &rt);
+				if (ret) {
+					igt_warn("Failed to set scheduling policy!\n");
+					break;
+				}
+			}
+
+			usleep(250);
+
+			spin = __igt_spin_batch_factory(fd, &opts);
+			if (!spin) {
+				igt_warn("Failed to create spinner! (%s)\n",
+					 passname[pass]);
+				break;
+			}
+			igt_spin_busywait_until_running(spin);
+
+			igt_until_timeout(pass > 0 ? 5 : 2) {
+				struct timespec ts = { };
+				double t;
+
+				igt_spin_batch_end(spin);
+				gem_sync(fd, spin->handle);
+				if (flags & RTIDLE)
+					igt_drop_caches_set(fd, DROP_IDLE);
+
+				/*
+				 * If we are oversubscribed (more RT hogs than
+				 * cpus) give the others a change to run;
+				 * otherwise, they will interrupt us in the
+				 * middle of the measurement.
+				 */
+				if (nengine > 1)
+					usleep(10*nengine);
+
+				__rearm_spin_batch(spin);
+
+				igt_nsec_elapsed(&ts);
+				__submit_spin_batch(fd, spin, engine);
+				if (!__spin_wait(fd, spin)) {
+					igt_warn("Wait timeout! (%s)\n",
+						 passname[pass]);
+					break;
+				}
+
+				t = igt_nsec_elapsed(&ts) * 1e-9;
+				if (t > max)
+					max = t;
+				if (t < min)
+					min = t;
+
+				igt_mean_add(&mean, t);
+			}
+
+			igt_spin_batch_free(fd, spin);
+
+			igt_info("%8s %10s: mean=%.2fus stddev=%.3fus [%.2fus, %.2fus] (n=%lu)\n",
+				 names[child],
+				 passname[pass],
+				 igt_mean_get(&mean) * 1e6,
+				 sqrt(igt_mean_get_variance(&mean)) * 1e6,
+				 min * 1e6, max * 1e6,
+				 mean.count);
+
+			results[NPASS * child + pass].mean = mean;
+			results[NPASS * child + pass].min = min;
+			results[NPASS * child + pass].max = max;
+		} while (++pass < NPASS);
+	}
+
+	igt_waitchildren();
+
+	for (unsigned int child = 0; child < nengine; child++) {
+		struct rt_pkt normal = results[NPASS * child + 1];
+		igt_stats_t stats;
+		double variance;
+
+		igt_stats_init_with_size(&stats, NPASS);
+
+		for (unsigned int pass = 2; pass < NPASS; pass++) {
+			struct rt_pkt *rt = &results[NPASS * child + pass];
+
+			igt_assert(rt->max);
+
+			igt_stats_push_float(&stats, igt_mean_get(&rt->mean));
+			variance += igt_mean_get_variance(&rt->mean);
+		}
+		variance /= NPASS - 2;
+
+		igt_info("%8s: normal latency=%.2f±%.3fus, rt latency=%.2f±%.3fus\n",
+			 names[child],
+			 igt_mean_get(&normal.mean) * 1e6,
+			 sqrt(igt_mean_get_variance(&normal.mean)) * 1e6,
+			 igt_stats_get_median(&stats) * 1e6,
+			 sqrt(variance) * 1e6);
+
+		igt_assert(igt_stats_get_median(&stats) <
+			   igt_mean_get(&normal.mean) * 2);
+
+		/* The system is noisy; be conservative when declaring fail. */
+		igt_assert(variance < igt_mean_get_variance(&normal.mean) * 10);
+	}
+
+	munmap(results, MMAP_SZ);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int device = -1;
+
+	igt_fixture {
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+		gem_require_mmap_wc(device);
+
+		gem_submission_print_method(device);
+
+		ring_size = gem_measure_ring_inflight(device, ALL_ENGINES, 0);
+		igt_info("Ring size: %d batches\n", ring_size);
+		igt_require(ring_size > 8);
+		ring_size -= 8; /* leave some spare */
+		if (ring_size > 1024)
+			ring_size = 1024;
+
+		intel_register_access_init(intel_get_pci_device(), false, device);
+	}
+
+	igt_subtest("all-rtidle-submit")
+		rthog_latency_on_ring(device, ALL_ENGINES, "all", RTIDLE);
+
+	igt_subtest("all-rthog-submit")
+		rthog_latency_on_ring(device, ALL_ENGINES, "all", 0);
+
+	igt_subtest_group {
+		igt_fixture
+			igt_require(intel_gen(intel_get_drm_devid(device)) >= 7);
+
+		for (e = intel_execution_engines; e->name; e++) {
+			if (e->exec_id == 0)
+				continue;
+
+			igt_subtest_group {
+				igt_fixture {
+					igt_require(gem_ring_has_physical_engine(device, e->exec_id | e->flags));
+				}
+
+				igt_subtest_f("%s-dispatch", e->name)
+					latency_on_ring(device,
+							e->exec_id | e->flags,
+							e->name, 0);
+
+				igt_subtest_f("%s-poll", e->name)
+					poll_ring(device,
+						  e->exec_id | e->flags,
+						  e->name);
+
+				igt_subtest_f("%s-rtidle-submit", e->name)
+					rthog_latency_on_ring(device,
+							      e->exec_id |
+							      e->flags,
+							      e->name,
+							      RTIDLE);
+
+				igt_subtest_f("%s-rthog-submit", e->name)
+					rthog_latency_on_ring(device,
+							      e->exec_id |
+							      e->flags,
+							      e->name,
+							      0);
+
+				igt_subtest_f("%s-dispatch-queued", e->name)
+					latency_on_ring(device,
+							e->exec_id | e->flags,
+							e->name, CORK);
+
+				igt_subtest_f("%s-synchronisation", e->name)
+					latency_from_ring(device,
+							  e->exec_id | e->flags,
+							  e->name, 0);
+
+				igt_subtest_f("%s-synchronisation-queued", e->name)
+					latency_from_ring(device,
+							  e->exec_id | e->flags,
+							  e->name, CORK);
+
+				igt_subtest_group {
+					igt_fixture {
+						gem_require_contexts(device);
+						igt_require(gem_scheduler_has_preemption(device));
+					}
+
+					igt_subtest_f("%s-preemption", e->name)
+						latency_from_ring(device,
+								  e->exec_id | e->flags,
+								  e->name, PREEMPT);
+				}
+			}
+		}
+	}
+
+	igt_fixture {
+		close(device);
+	}
+}
diff --git a/tests/i915/gem_exec_lut_handle.c b/tests/i915/gem_exec_lut_handle.c
new file mode 100644
index 00000000..98e6ae5a
--- /dev/null
+++ b/tests/i915/gem_exec_lut_handle.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/* Exercises the basic execbuffer using the handle LUT interface */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Exercises the basic execbuffer using the handle LUT"
+		     " interface.");
+
+#define BATCH_SIZE		(1024*1024)
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define MAX_NUM_EXEC 2048
+#define MAX_NUM_RELOC 4096
+
+#define SKIP_RELOC 0x1
+#define NO_RELOC 0x2
+#define CYCLE_BATCH 0x4
+#define FAULT 0x8
+
+int target[MAX_NUM_RELOC];
+struct drm_i915_gem_exec_object2 gem_exec[MAX_NUM_EXEC+1];
+struct drm_i915_gem_relocation_entry mem_reloc[MAX_NUM_RELOC];
+
+static uint32_t state = 0x12345678;
+
+static uint32_t
+hars_petruska_f54_1_random (void)
+{
+#define rol(x,k) ((x << k) | (x >> (32-k)))
+    return state = (state ^ rol (state, 5) ^ rol (state, 24)) + 0x37798849;
+#undef rol
+}
+
+static int has_exec_lut(int fd)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer((gem_exec + MAX_NUM_EXEC));
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
+
+	return __gem_execbuf(fd, &execbuf) == 0;
+}
+
+#define ELAPSED(a,b) (1e6*((b)->tv_sec - (a)->tv_sec) + ((b)->tv_usec - (a)->tv_usec))
+igt_simple_main
+{
+	uint32_t batch[2] = {MI_BATCH_BUFFER_END};
+	uint32_t cycle[16];
+	int fd, n, m, count, c;
+	const struct {
+		const char *name;
+		unsigned int flags;
+	} pass[] = {
+		{ .name = "relocation", .flags = 0 },
+		{ .name = "cycle-relocation", .flags = CYCLE_BATCH },
+		{ .name = "fault-relocation", .flags = FAULT },
+		{ .name = "skip-relocs", .flags = SKIP_RELOC },
+		{ .name = "no-relocs", .flags = SKIP_RELOC | NO_RELOC },
+		{ .name = NULL },
+	}, *p;
+	struct drm_i915_gem_relocation_entry *reloc;
+	uint32_t reloc_handle;
+	int size;
+
+	igt_skip_on_simulation();
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+
+	memset(gem_exec, 0, sizeof(gem_exec));
+	for (n = 0; n < MAX_NUM_EXEC; n++)
+		gem_exec[n].handle = gem_create(fd, 4096);
+
+	for (n = 0; n < 16; n++) {
+		cycle[n] = gem_create(fd, 4096);
+		gem_write(fd, cycle[n], 0, batch, sizeof(batch));
+	}
+	gem_exec[MAX_NUM_EXEC].handle = cycle[0];
+
+	memset(mem_reloc, 0, sizeof(mem_reloc));
+	for (n = 0; n < MAX_NUM_RELOC; n++) {
+		mem_reloc[n].offset = 1024;
+		mem_reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
+	}
+
+	size = ALIGN(sizeof(mem_reloc), 4096);
+	reloc_handle = gem_create(fd, size);
+	reloc = gem_mmap__cpu(fd, reloc_handle, 0, size, PROT_READ | PROT_WRITE);
+	for (n = 0; n < MAX_NUM_RELOC; n++) {
+		reloc[n].offset = 1024;
+		reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
+	}
+	munmap(reloc, size);
+
+	igt_require(has_exec_lut(fd));
+
+	for (p = pass; p->name != NULL; p++) {
+		if (p->flags & FAULT)
+			igt_disable_prefault();
+		for (n = 1; n <= MAX_NUM_EXEC; n *= 2) {
+			double elapsed[16][2];
+			double s_x, s_y, s_xx, s_xy;
+			double A, B;
+			int i, j;
+
+			for (i = 0, m = 1; m <= MAX_NUM_RELOC; m *= 2, i++) {
+				struct drm_i915_gem_execbuffer2 execbuf;
+				struct drm_i915_gem_exec_object2 *objects;
+				struct timeval start, end;
+
+				if (p->flags & FAULT)
+					reloc = __gem_mmap__cpu(fd, reloc_handle, 0, size, PROT_READ | PROT_WRITE);
+				else
+					reloc = mem_reloc;
+
+				gem_exec[MAX_NUM_EXEC].relocation_count = m;
+				gem_exec[MAX_NUM_EXEC].relocs_ptr = to_user_pointer(reloc);
+				objects = gem_exec + MAX_NUM_EXEC - n;
+
+				memset(&execbuf, 0, sizeof(execbuf));
+				execbuf.buffers_ptr = to_user_pointer(objects);
+				execbuf.buffer_count = n + 1;
+				execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
+				if (p->flags & NO_RELOC)
+					execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+				for (j = 0; j < m; j++) {
+					target[j] = hars_petruska_f54_1_random() % n;
+					reloc[j].target_handle = target[j];
+					reloc[j].presumed_offset = -1;
+				}
+
+				gem_execbuf(fd,&execbuf);
+				gettimeofday(&start, NULL);
+				for (count = 0; count < 1000; count++) {
+					if ((p->flags & SKIP_RELOC) == 0) {
+						for (j = 0; j < m; j++)
+							reloc[j].presumed_offset = -1;
+						if (p->flags & CYCLE_BATCH) {
+							c = (c + 1) % 16;
+							gem_exec[MAX_NUM_EXEC].handle = cycle[c];
+						}
+					}
+					if (p->flags & FAULT) {
+						munmap(reloc, size);
+						reloc = __gem_mmap__cpu(fd, reloc_handle, 0, size, PROT_READ | PROT_WRITE);
+						gem_exec[MAX_NUM_EXEC].relocs_ptr = to_user_pointer(reloc);
+					}
+					gem_execbuf(fd, &execbuf);
+				}
+				gettimeofday(&end, NULL);
+				c = 16;
+				do
+					gem_sync(fd, cycle[--c]);
+				while (c != 0);
+				gem_exec[MAX_NUM_EXEC].handle = cycle[c];
+				elapsed[i][1] = ELAPSED(&start, &end);
+
+				execbuf.flags &= ~LOCAL_I915_EXEC_HANDLE_LUT;
+				for (j = 0; j < m; j++)
+					reloc[j].target_handle = objects[target[j]].handle;
+
+				gem_execbuf(fd,&execbuf);
+				gettimeofday(&start, NULL);
+				for (count = 0; count < 1000; count++) {
+					if ((p->flags & SKIP_RELOC) == 0) {
+						for (j = 0; j < m; j++)
+							reloc[j].presumed_offset = -1;
+						if (p->flags & CYCLE_BATCH) {
+							c = (c + 1) % 16;
+							gem_exec[MAX_NUM_EXEC].handle = cycle[c];
+						}
+					}
+					if (p->flags & FAULT) {
+						munmap(reloc, size);
+						reloc = __gem_mmap__cpu(fd, reloc_handle, 0, size, PROT_READ | PROT_WRITE);
+						gem_exec[MAX_NUM_EXEC].relocs_ptr = to_user_pointer(reloc);
+					}
+					gem_execbuf(fd, &execbuf);
+				}
+				gettimeofday(&end, NULL);
+				c = 16;
+				do
+					gem_sync(fd, cycle[--c]);
+				while (c != 0);
+				gem_exec[MAX_NUM_EXEC].handle = cycle[c];
+				elapsed[i][0] = ELAPSED(&start, &end);
+
+				if (p->flags & FAULT)
+					munmap(reloc, size);
+			}
+
+			igt_info("%s: buffers=%4d:", p->name, n);
+
+			s_x = s_y = s_xx = s_xy = 0;
+			for (j = 0; j < i; j++) {
+				int k = 1 << j;
+				s_x += k;
+				s_y += elapsed[j][0];
+				s_xx += k * k;
+				s_xy += k * elapsed[j][0];
+			}
+			B = (s_xy - s_x * s_y / j) / (s_xx - s_x * s_x / j);
+			A = s_y / j - B * s_x / j;
+			igt_info(" old=%7.0f + %.1f*reloc,", A, B);
+
+			s_x = s_y = s_xx = s_xy = 0;
+			for (j = 0; j < i; j++) {
+				int k = 1 << j;
+				s_x += k;
+				s_y += elapsed[j][1];
+				s_xx += k * k;
+				s_xy += k * elapsed[j][1];
+			}
+			B = (s_xy - s_x * s_y / j) / (s_xx - s_x * s_x / j);
+			A = s_y / j - B * s_x / j;
+			igt_info(" lut=%7.0f + %.1f*reloc (ns)", A, B);
+
+			igt_info("\n");
+		}
+		if (p->flags & FAULT)
+			igt_enable_prefault();
+	}
+}
diff --git a/tests/i915/gem_exec_nop.c b/tests/i915/gem_exec_nop.c
new file mode 100644
index 00000000..59a08ad0
--- /dev/null
+++ b/tests/i915/gem_exec_nop.c
@@ -0,0 +1,829 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_rand.h"
+#include "igt_sysfs.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/time.h>
+#include <time.h>
+#include "drm.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
+#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
+
+#define FORKED 1
+#define CHAINED 2
+#define CONTEXT 4
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+	return ((end->tv_sec - start->tv_sec) +
+		(end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+static double nop_on_ring(int fd, uint32_t handle, unsigned ring_id,
+			  int timeout, unsigned long *out)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct timespec start, now;
+	unsigned long count;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = ring_id;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags = ring_id;
+		gem_execbuf(fd, &execbuf);
+	}
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	count = 0;
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	do {
+		for (int loop = 0; loop < 1024; loop++)
+			gem_execbuf(fd, &execbuf);
+
+		count += 1024;
+		clock_gettime(CLOCK_MONOTONIC, &now);
+	} while (elapsed(&start, &now) < timeout);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	*out = count;
+	return elapsed(&start, &now);
+}
+
+static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const uint32_t MI_ARB_CHK = 0x5 << 23;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_relocation_entry reloc[4], *r;
+	uint32_t *bbe[2], *state, *batch;
+	unsigned engines[16], nengine, flags;
+	struct timespec tv = {};
+	unsigned long cycles;
+	uint64_t elapsed;
+
+	flags = I915_EXEC_NO_RELOC;
+	if (gen == 4 || gen == 5)
+		flags |= I915_EXEC_SECURE;
+
+	nengine = 0;
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			engines[nengine++] = ring;
+		}
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		engines[nengine++] = ring;
+	}
+	igt_require(nengine);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	obj.relocs_ptr = to_user_pointer(reloc);
+	obj.relocation_count = ARRAY_SIZE(reloc);
+
+	r = memset(reloc, 0, sizeof(reloc));
+	batch = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE);
+
+	for (unsigned int start_offset = 0;
+	     start_offset <= 128;
+	     start_offset += 128) {
+		uint32_t *b = batch + start_offset / sizeof(*batch);
+
+		r->target_handle = obj.handle;
+		r->offset = (b - batch + 1) * sizeof(uint32_t);
+		r->delta = 4092;
+		r->read_domains = I915_GEM_DOMAIN_RENDER;
+
+		*b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			*++b = r->delta;
+			*++b = 0;
+		} else if (gen >= 4) {
+			r->offset += sizeof(uint32_t);
+			*++b = 0;
+			*++b = r->delta;
+		} else {
+			*b -= 1;
+			*++b = r->delta;
+		}
+		*++b = start_offset != 0;
+		r++;
+
+		b = batch + (start_offset + 64) / sizeof(*batch);
+		bbe[start_offset != 0] = b;
+		*b++ = MI_ARB_CHK;
+
+		r->target_handle = obj.handle;
+		r->offset = (b - batch + 1) * sizeof(uint32_t);
+		r->read_domains = I915_GEM_DOMAIN_COMMAND;
+		r->delta = start_offset + 64;
+		if (gen >= 8) {
+			*b++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+			*b++ = r->delta;
+			*b++ = 0;
+		} else if (gen >= 6) {
+			*b++ = MI_BATCH_BUFFER_START | 1 << 8;
+			*b++ = r->delta;
+		} else {
+			*b++ = MI_BATCH_BUFFER_START | 2 << 6;
+			if (gen < 4)
+				r->delta |= 1;
+			*b++ = r->delta;
+		}
+		r++;
+	}
+	igt_assert(r == reloc + ARRAY_SIZE(reloc));
+	state = batch + 1023;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = engines[0];
+
+	cycles = 0;
+	do {
+		unsigned int idx = ++cycles & 1;
+
+		*bbe[idx] = MI_ARB_CHK;
+		execbuf.batch_start_offset =
+			(bbe[idx] - batch) * sizeof(*batch) - 64;
+
+		execbuf.flags = engines[cycles % nengine] | flags;
+		gem_execbuf(fd, &execbuf);
+
+		*bbe[!idx] = MI_BATCH_BUFFER_END;
+		__sync_synchronize();
+
+		while (READ_ONCE(*state) != idx)
+			;
+	} while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout);
+	*bbe[cycles & 1] = MI_BATCH_BUFFER_END;
+	gem_sync(fd, obj.handle);
+
+	igt_info("%s completed %ld cycles: %.3f us\n",
+		 name, cycles, elapsed*1e-3/cycles);
+
+	munmap(batch, 4096);
+	gem_close(fd, obj.handle);
+}
+
+static void single(int fd, uint32_t handle,
+		   unsigned ring_id, const char *ring_name)
+{
+	double time;
+	unsigned long count;
+
+	gem_require_ring(fd, ring_id);
+
+	time = nop_on_ring(fd, handle, ring_id, 20, &count);
+	igt_info("%s: %'lu cycles: %.3fus\n",
+		 ring_name, count, time*1e6 / count);
+}
+
+static double
+stable_nop_on_ring(int fd, uint32_t handle, unsigned int engine,
+		   int timeout, int reps)
+{
+	igt_stats_t s;
+	double n;
+
+	igt_assert(reps >= 5);
+
+	igt_stats_init_with_size(&s, reps);
+	s.is_float = true;
+
+	while (reps--) {
+		unsigned long count;
+		double time;
+
+		time = nop_on_ring(fd, handle, engine, timeout, &count);
+		igt_stats_push_float(&s, time / count);
+	}
+
+	n = igt_stats_get_median(&s);
+	igt_stats_fini(&s);
+
+	return n;
+}
+
+#define assert_within_epsilon(x, ref, tolerance) \
+        igt_assert_f((x) <= (1.0 + tolerance) * ref && \
+                     (x) >= (1.0 - tolerance) * ref, \
+                     "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
+                     #x, #ref, x, tolerance * 100.0, ref)
+
+static void headless(int fd, uint32_t handle)
+{
+	unsigned int nr_connected = 0;
+	drmModeConnector *connector;
+	drmModeRes *res;
+	double n_display, n_headless;
+
+	res = drmModeGetResources(fd);
+	igt_require(res);
+
+	/* require at least one connected connector for the test */
+	for (int i = 0; i < res->count_connectors; i++) {
+		connector = drmModeGetConnectorCurrent(fd, res->connectors[i]);
+		if (connector->connection == DRM_MODE_CONNECTED)
+			nr_connected++;
+		drmModeFreeConnector(connector);
+	}
+	igt_require(nr_connected > 0);
+
+	/* set graphics mode to prevent blanking */
+	kmstest_set_vt_graphics_mode();
+
+	/* benchmark nops */
+	n_display = stable_nop_on_ring(fd, handle, I915_EXEC_DEFAULT, 1, 5);
+	igt_info("With one display connected: %.2fus\n",
+		 n_display * 1e6);
+
+	/* force all connectors off */
+	kmstest_unset_all_crtcs(fd, res);
+
+	/* benchmark nops again */
+	n_headless = stable_nop_on_ring(fd, handle, I915_EXEC_DEFAULT, 1, 5);
+	igt_info("Without a display connected (headless): %.2fus\n",
+		 n_headless * 1e6);
+
+	/* check that the two execution speeds are roughly the same */
+	assert_within_epsilon(n_headless, n_display, 0.1f);
+}
+
+static void parallel(int fd, uint32_t handle, int timeout)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	unsigned engines[16];
+	const char *names[16];
+	unsigned nengine;
+	unsigned engine;
+	unsigned long count;
+	double time, sum;
+
+	sum = 0;
+	nengine = 0;
+	for_each_physical_engine(fd, engine) {
+		engines[nengine] = engine;
+		names[nengine] = e__->name;
+		nengine++;
+
+		time = nop_on_ring(fd, handle, engine, 1, &count) / count;
+		sum += time;
+		igt_debug("%s: %.3fus\n", e__->name, 1e6*time);
+	}
+	igt_require(nengine);
+	igt_info("average (individually): %.3fus\n", sum/nengine*1e6);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags = 0;
+		gem_execbuf(fd, &execbuf);
+	}
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	igt_fork(child, nengine) {
+		struct timespec start, now;
+
+		execbuf.flags &= ~ENGINE_FLAGS;
+		execbuf.flags |= engines[child];
+
+		count = 0;
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		do {
+			for (int loop = 0; loop < 1024; loop++)
+				gem_execbuf(fd, &execbuf);
+			count += 1024;
+			clock_gettime(CLOCK_MONOTONIC, &now);
+		} while (elapsed(&start, &now) < timeout);
+		time = elapsed(&start, &now) / count;
+		igt_info("%s: %ld cycles, %.3fus\n", names[child], count, 1e6*time);
+	}
+
+	igt_waitchildren();
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+}
+
+static void series(int fd, uint32_t handle, int timeout)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct timespec start, now, sync;
+	unsigned engines[16];
+	unsigned nengine;
+	unsigned engine;
+	unsigned long count;
+	double time, max = 0, min = HUGE_VAL, sum = 0;
+	const char *name;
+
+	nengine = 0;
+	for_each_physical_engine(fd, engine) {
+		time = nop_on_ring(fd, handle, engine, 1, &count) / count;
+		if (time > max) {
+			name = e__->name;
+			max = time;
+		}
+		if (time < min)
+			min = time;
+		sum += time;
+		engines[nengine++] = engine;
+	}
+	igt_require(nengine);
+	igt_info("Maximum execution latency on %s, %.3fus, min %.3fus, total %.3fus per cycle, average %.3fus\n",
+		 name, max*1e6, min*1e6, sum*1e6, sum/nengine*1e6);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags = 0;
+		gem_execbuf(fd, &execbuf);
+	}
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	count = 0;
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	do {
+		for (int loop = 0; loop < 1024; loop++) {
+			for (int n = 0; n < nengine; n++) {
+				execbuf.flags &= ~ENGINE_FLAGS;
+				execbuf.flags |= engines[n];
+				gem_execbuf(fd, &execbuf);
+			}
+		}
+		count += nengine * 1024;
+		clock_gettime(CLOCK_MONOTONIC, &now);
+	} while (elapsed(&start, &now) < timeout); /* Hang detection ~120s */
+	gem_sync(fd, handle);
+	clock_gettime(CLOCK_MONOTONIC, &sync);
+	igt_debug("sync time: %.3fus\n", elapsed(&now, &sync)*1e6);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	time = elapsed(&start, &now) / count;
+	igt_info("All (%d engines): %'lu cycles, average %.3fus per cycle [expected %.3fus]\n",
+		 nengine, count, 1e6*time, 1e6*((max-min)/nengine+min));
+
+	/* The rate limiting step should be how fast the slowest engine can
+	 * execute its queue of requests, as when we wait upon a full ring all
+	 * dispatch is frozen. So in general we cannot go faster than the
+	 * slowest engine (but as all engines are in lockstep, they should all
+	 * be executing in parallel and so the average should be max/nengines),
+	 * but we should equally not go any slower.
+	 *
+	 * However, that depends upon being able to submit fast enough, and
+	 * that in turns depends upon debugging turned off and no bottlenecks
+	 * within the driver. We cannot assert that we hit ideal conditions
+	 * across all engines, so we only look for an outrageous error
+	 * condition.
+	 */
+	igt_assert_f(time < 2*sum,
+		     "Average time (%.3fus) exceeds expectation for parallel execution (min %.3fus, max %.3fus; limit set at %.3fus)\n",
+		     1e6*time, 1e6*min, 1e6*max, 1e6*sum*2);
+}
+
+static void xchg(void *array, unsigned i, unsigned j)
+{
+	unsigned *u = array;
+	unsigned tmp = u[i];
+	u[i] = u[j];
+	u[j] = tmp;
+}
+
+static void sequential(int fd, uint32_t handle, unsigned flags, int timeout)
+{
+	const int ncpus = flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	unsigned engines[16];
+	unsigned nengine;
+	double *results;
+	double time, sum;
+	unsigned n;
+
+	gem_require_contexts(fd);
+
+	results = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(results != MAP_FAILED);
+
+	nengine = 0;
+	sum = 0;
+	for_each_physical_engine(fd, n) {
+		unsigned long count;
+
+		time = nop_on_ring(fd, handle, n, 1, &count) / count;
+		sum += time;
+		igt_debug("%s: %.3fus\n", e__->name, 1e6*time);
+
+		engines[nengine++] = n;
+	}
+	igt_require(nengine);
+	igt_info("Total (individual) execution latency %.3fus per cycle\n",
+		 1e6*sum);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, 4096);
+	obj[0].flags = EXEC_OBJECT_WRITE;
+	obj[1].handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	igt_require(__gem_execbuf(fd, &execbuf) == 0);
+
+	if (flags & CONTEXT) {
+		uint32_t id;
+
+		igt_require(__gem_context_create(fd, &id) == 0);
+		execbuf.rsvd1 = id;
+	}
+
+	for (n = 0; n < nengine; n++) {
+		execbuf.flags &= ~ENGINE_FLAGS;
+		execbuf.flags |= engines[n];
+		igt_require(__gem_execbuf(fd, &execbuf) == 0);
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	igt_fork(child, ncpus) {
+		struct timespec start, now;
+		unsigned long count;
+
+		obj[0].handle = gem_create(fd, 4096);
+		gem_execbuf(fd, &execbuf);
+
+		if (flags & CONTEXT)
+			execbuf.rsvd1 = gem_context_create(fd);
+
+		hars_petruska_f54_1_random_perturb(child);
+
+		count = 0;
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		do {
+			igt_permute_array(engines, nengine, xchg);
+			if (flags & CHAINED) {
+				for (n = 0; n < nengine; n++) {
+					execbuf.flags &= ~ENGINE_FLAGS;
+					execbuf.flags |= engines[n];
+					for (int loop = 0; loop < 1024; loop++)
+						gem_execbuf(fd, &execbuf);
+				}
+			} else {
+				for (int loop = 0; loop < 1024; loop++) {
+					for (n = 0; n < nengine; n++) {
+						execbuf.flags &= ~ENGINE_FLAGS;
+						execbuf.flags |= engines[n];
+						gem_execbuf(fd, &execbuf);
+					}
+				}
+			}
+			count += 1024;
+			clock_gettime(CLOCK_MONOTONIC, &now);
+		} while (elapsed(&start, &now) < timeout); /* Hang detection ~120s */
+
+		gem_sync(fd, obj[0].handle);
+		clock_gettime(CLOCK_MONOTONIC, &now);
+		results[child] = elapsed(&start, &now) / count;
+
+		if (flags & CONTEXT)
+			gem_context_destroy(fd, execbuf.rsvd1);
+
+		gem_close(fd, obj[0].handle);
+	}
+	igt_waitchildren();
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	results[ncpus] = 0;
+	for (n = 0; n < ncpus; n++)
+		results[ncpus] += results[n];
+	results[ncpus] /= ncpus;
+
+	igt_info("Sequential (%d engines, %d processes): average %.3fus per cycle [expected %.3fus]\n",
+		 nengine, ncpus, 1e6*results[ncpus], 1e6*sum*ncpus);
+
+	if (flags & CONTEXT)
+		gem_context_destroy(fd, execbuf.rsvd1);
+
+	gem_close(fd, obj[0].handle);
+	munmap(results, 4096);
+}
+
+#define LOCAL_EXEC_FENCE_OUT (1 << 17)
+static bool fence_enable_signaling(int fence)
+{
+	return poll(&(struct pollfd){fence, POLLIN}, 1, 0) == 0;
+}
+
+static bool fence_wait(int fence)
+{
+	return poll(&(struct pollfd){fence, POLLIN}, 1, -1) == 1;
+}
+
+static void fence_signal(int fd, uint32_t handle,
+			 unsigned ring_id, const char *ring_name,
+			 int timeout)
+{
+#define NFENCES 512
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct timespec start, now;
+	unsigned engines[16];
+	unsigned nengine;
+	int *fences, n;
+	unsigned long count, signal;
+
+	igt_require(gem_has_exec_fence(fd));
+
+	nengine = 0;
+	if (ring_id == ALL_ENGINES) {
+		for_each_physical_engine(fd, n)
+			engines[nengine++] = n;
+	} else {
+		gem_require_ring(fd, ring_id);
+		engines[nengine++] = ring_id;
+	}
+	igt_require(nengine);
+
+	fences = malloc(sizeof(*fences) * NFENCES);
+	igt_assert(fences);
+	memset(fences, -1, sizeof(*fences) * NFENCES);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = LOCAL_EXEC_FENCE_OUT;
+
+	n = 0;
+	count = 0;
+	signal = 0;
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	do {
+		for (int loop = 0; loop < 1024; loop++) {
+			for (int e = 0; e < nengine; e++) {
+				if (fences[n] != -1) {
+					igt_assert(fence_wait(fences[n]));
+					close(fences[n]);
+				}
+
+				execbuf.flags &= ~ENGINE_FLAGS;
+				execbuf.flags |= engines[e];
+				gem_execbuf_wr(fd, &execbuf);
+
+				/* Enable signaling by doing a poll() */
+				fences[n] = execbuf.rsvd2 >> 32;
+				signal += fence_enable_signaling(fences[n]);
+
+				n = (n + 1) % NFENCES;
+			}
+		}
+
+		count += 1024 * nengine;
+		clock_gettime(CLOCK_MONOTONIC, &now);
+	} while (elapsed(&start, &now) < timeout);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	for (n = 0; n < NFENCES; n++)
+		if (fences[n] != -1)
+			close(fences[n]);
+	free(fences);
+
+	igt_info("Signal %s: %'lu cycles (%'lu signals): %.3fus\n",
+		 ring_name, count, signal, elapsed(&start, &now) * 1e6 / count);
+}
+
+static void preempt(int fd, uint32_t handle,
+		   unsigned ring_id, const char *ring_name)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct timespec start, now;
+	unsigned long count;
+	uint32_t ctx[2];
+
+	gem_require_ring(fd, ring_id);
+
+	ctx[0] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[0], MIN_PRIO);
+
+	ctx[1] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[1], MAX_PRIO);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = ring_id;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags = ring_id;
+		gem_execbuf(fd, &execbuf);
+	}
+	execbuf.rsvd1 = ctx[1];
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	count = 0;
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	do {
+		igt_spin_t *spin =
+			__igt_spin_batch_new(fd,
+					     .ctx = ctx[0],
+					     .engine = ring_id);
+
+		for (int loop = 0; loop < 1024; loop++)
+			gem_execbuf(fd, &execbuf);
+
+		igt_spin_batch_free(fd, spin);
+
+		count += 1024;
+		clock_gettime(CLOCK_MONOTONIC, &now);
+	} while (elapsed(&start, &now) < 20);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	gem_context_destroy(fd, ctx[1]);
+	gem_context_destroy(fd, ctx[0]);
+
+	igt_info("%s: %'lu cycles: %.3fus\n",
+		 ring_name, count, elapsed(&start, &now)*1e6 / count);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	uint32_t handle = 0;
+	int device = -1;
+
+	igt_fixture {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+		gem_submission_print_method(device);
+		gem_scheduler_print_capability(device);
+
+		handle = gem_create(device, 4096);
+		gem_write(device, handle, 0, &bbe, sizeof(bbe));
+
+		igt_fork_hang_detector(device);
+	}
+
+	igt_subtest("basic-series")
+		series(device, handle, 5);
+
+	igt_subtest("basic-parallel")
+		parallel(device, handle, 5);
+
+	igt_subtest("basic-sequential")
+		sequential(device, handle, 0, 5);
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("%s", e->name)
+			single(device, handle, e->exec_id | e->flags, e->name);
+		igt_subtest_f("signal-%s", e->name)
+			fence_signal(device, handle, e->exec_id | e->flags, e->name, 5);
+	}
+
+	igt_subtest("signal-all")
+		fence_signal(device, handle, ALL_ENGINES, "all", 150);
+
+	igt_subtest("series")
+		series(device, handle, 150);
+
+	igt_subtest("parallel")
+		parallel(device, handle, 150);
+
+	igt_subtest("sequential")
+		sequential(device, handle, 0, 150);
+
+	igt_subtest("forked-sequential")
+		sequential(device, handle, FORKED, 150);
+
+	igt_subtest("chained-sequential")
+		sequential(device, handle, FORKED | CHAINED, 150);
+
+	igt_subtest("context-sequential")
+		sequential(device, handle, FORKED | CONTEXT, 150);
+
+	igt_subtest_group {
+		igt_fixture {
+			gem_require_contexts(device);
+			igt_require(gem_scheduler_has_ctx_priority(device));
+			igt_require(gem_scheduler_has_preemption(device));
+		}
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_f("preempt-%s", e->name)
+				preempt(device, handle, e->exec_id | e->flags, e->name);
+		}
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_device_set_master(device);
+		}
+
+		for (e = intel_execution_engines; e->name; e++) {
+			/* Requires master for STORE_DWORD on gen4/5 */
+			igt_subtest_f("poll-%s", e->name)
+				poll_ring(device,
+					  e->exec_id | e->flags, e->name, 20);
+		}
+
+		igt_subtest("poll-sequential")
+			poll_ring(device, ALL_ENGINES, "Sequential", 20);
+
+		igt_subtest("headless") {
+			/* Requires master for changing display modes */
+			headless(device, handle);
+		}
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		gem_close(device, handle);
+		close(device);
+	}
+}
diff --git a/tests/i915/gem_exec_parallel.c b/tests/i915/gem_exec_parallel.c
new file mode 100644
index 00000000..a6fa698e
--- /dev/null
+++ b/tests/i915/gem_exec_parallel.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/** @file gem_exec_parallel.c
+ *
+ * Exercise using many, many writers into a buffer.
+ */
+
+#include <pthread.h>
+
+#include "igt.h"
+#include "igt_gt.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+#define VERIFY 0
+
+static void check_bo(int fd, uint32_t handle, int pass)
+{
+	uint32_t *map;
+	int i;
+
+	igt_debug("Verifying result (pass=%d, handle=%d)\n", pass, handle);
+	map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_READ);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0);
+	for (i = 0; i < 1024; i++)
+		igt_assert_eq(map[i], i);
+	munmap(map, 4096);
+}
+
+#define CONTEXTS 0x1
+#define FDS 0x2
+
+struct thread {
+	pthread_t thread;
+	pthread_mutex_t *mutex;
+	pthread_cond_t *cond;
+	unsigned flags;
+	uint32_t *scratch;
+	unsigned id;
+	unsigned engine;
+	int fd, gen, *go;
+};
+
+static void *thread(void *data)
+{
+	struct thread *t = data;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[16];
+	int fd, i;
+
+	pthread_mutex_lock(t->mutex);
+	while (*t->go == 0)
+		pthread_cond_wait(t->cond, t->mutex);
+	pthread_mutex_unlock(t->mutex);
+
+	if (t->flags & FDS)
+		fd = drm_open_driver(DRIVER_INTEL);
+	else
+		fd = t->fd;
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (t->gen < 6 ? 1 << 22 : 0);
+	if (t->gen >= 8) {
+		batch[++i] = 4*t->id;
+		batch[++i] = 0;
+	} else if (t->gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = 4*t->id;
+	} else {
+		batch[i]--;
+		batch[++i] = 4*t->id;
+	}
+	batch[++i] = t->id;
+	batch[++i] = MI_BATCH_BUFFER_END;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].flags = EXEC_OBJECT_WRITE;
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.offset = sizeof(uint32_t);
+	if (t->gen < 8 && t->gen >= 4)
+		reloc.offset += sizeof(uint32_t);
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.delta = 4*t->id;
+	obj[1].handle = gem_create(fd, 4096);
+	obj[1].relocs_ptr = to_user_pointer(&reloc);
+	obj[1].relocation_count = 1;
+	gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = t->engine;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	if (t->gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+	if (t->flags & CONTEXTS)
+		execbuf.rsvd1 = gem_context_create(fd);
+
+	for (i = 0; i < 16; i++) {
+		obj[0].handle = t->scratch[i];
+		if (t->flags & FDS)
+			obj[0].handle = gem_open(fd, obj[0].handle);
+
+		gem_execbuf(fd, &execbuf);
+
+		if (t->flags & FDS)
+			gem_close(fd, obj[0].handle);
+	}
+
+	if (t->flags & CONTEXTS)
+		gem_context_destroy(fd, execbuf.rsvd1);
+	gem_close(fd, obj[1].handle);
+	if (t->flags & FDS)
+		close(fd);
+
+	return NULL;
+}
+
+static void all(int fd, unsigned engine, unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	pthread_mutex_t mutex;
+	pthread_cond_t cond;
+	struct thread *threads;
+	unsigned engines[16];
+	unsigned nengine;
+	uint32_t scratch[16], handle[16];
+	int go;
+	int i;
+
+	if (flags & CONTEXTS)
+		gem_require_contexts(fd);
+
+	if (flags & FDS)
+		igt_require(gen > 5);
+
+	nengine = 0;
+	if (engine == ALL_ENGINES) {
+		for_each_physical_engine(fd, engine) {
+			if (gem_can_store_dword(fd, engine))
+				engines[nengine++] = engine;
+		}
+	} else {
+		igt_require(gem_has_ring(fd, engine));
+		igt_require(gem_can_store_dword(fd, engine));
+		engines[nengine++] = engine;
+	}
+	igt_require(nengine);
+
+	for (i = 0; i < 16; i++) {
+		scratch[i] = handle[i] = gem_create(fd, 4096);
+		if (flags & FDS)
+			scratch[i] = gem_flink(fd, handle[i]);
+	}
+
+	threads = calloc(1024, sizeof(struct thread));
+	igt_assert(threads);
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	pthread_mutex_init(&mutex, 0);
+	pthread_cond_init(&cond, 0);
+	go = 0;
+
+	for (i = 0; i < 1024; i++) {
+		threads[i].id = i;
+		threads[i].fd = fd;
+		threads[i].gen = gen;
+		threads[i].engine = engines[i % nengine];
+		threads[i].flags = flags;
+		threads[i].scratch = scratch;
+		threads[i].mutex = &mutex;
+		threads[i].cond = &cond;
+		threads[i].go = &go;
+
+		pthread_create(&threads[i].thread, 0, thread, &threads[i]);
+	}
+
+	pthread_mutex_lock(&mutex);
+	go = 1024;
+	pthread_cond_broadcast(&cond);
+	pthread_mutex_unlock(&mutex);
+
+	for (i = 0; i < 1024; i++)
+		pthread_join(threads[i].thread, NULL);
+
+	for (i = 0; i < 16; i++) {
+		check_bo(fd, handle[i], i);
+		gem_close(fd, handle[i]);
+	}
+
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+	free(threads);
+}
+
+igt_main
+{
+	const struct mode {
+		const char *name;
+		unsigned flags;
+	} modes[] = {
+		{ "", 0 },
+		{ "contexts", CONTEXTS },
+		{ "fds", FDS },
+		{ NULL }
+	};
+	int fd;
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		igt_fork_hang_detector(fd);
+	}
+
+	for (const struct mode *m = modes; m->name; m++)
+		igt_subtest_f("%s", *m->name ? m->name : "basic")
+			all(fd, ALL_ENGINES, m->flags);
+
+	for (const struct intel_execution_engine *e = intel_execution_engines;
+	     e->name; e++) {
+		for (const struct mode *m = modes; m->name; m++)
+			igt_subtest_f("%s%s%s",
+				      e->name,
+				      *m->name ? "-" : "",
+				      m->name)
+				all(fd, e->exec_id | e->flags, m->flags);
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_params.c b/tests/i915/gem_exec_params.c
new file mode 100644
index 00000000..49c56a8d
--- /dev/null
+++ b/tests/i915/gem_exec_params.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter
+ *
+ */
+
+#include "igt.h"
+#include "igt_device.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+
+#define LOCAL_I915_EXEC_VEBOX (4<<0)
+#define LOCAL_I915_EXEC_BSD_MASK (3<<13)
+#define LOCAL_I915_EXEC_BSD_RING1 (1<<13)
+#define LOCAL_I915_EXEC_BSD_RING2 (2<<13)
+#define LOCAL_I915_EXEC_RESOURCE_STREAMER (1<<15)
+#define LOCAL_I915_EXEC_FENCE_IN (1 << 16)
+#define LOCAL_I915_EXEC_FENCE_OUT (1 << 17)
+#define LOCAL_I915_EXEC_BATCH_FIRST (1 << 18)
+#define LOCAL_I915_EXEC_FENCE_ARRAY (1 << 19)
+
+static bool has_ring(int fd, unsigned ring_exec_flags)
+{
+	switch (ring_exec_flags & I915_EXEC_RING_MASK) {
+	case 0:
+	case I915_EXEC_RENDER:
+		return true;
+
+	case I915_EXEC_BSD:
+		if (ring_exec_flags & LOCAL_I915_EXEC_BSD_MASK)
+			return gem_has_bsd2(fd);
+		else
+			return gem_has_bsd(fd);
+
+	case I915_EXEC_BLT:
+		return gem_has_blt(fd);
+
+	case I915_EXEC_VEBOX:
+		return gem_has_vebox(fd);
+	}
+
+	igt_assert_f(0, "invalid exec flag 0x%x\n", ring_exec_flags);
+	return false;
+}
+
+static bool has_exec_batch_first(int fd)
+{
+	int val = -1;
+	struct drm_i915_getparam gp = {
+		.param = 48,
+		.value = &val,
+	};
+	ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	return val > 0;
+}
+
+static bool has_resource_streamer(int fd)
+{
+	int val = -1;
+	struct drm_i915_getparam gp = {
+		.param = I915_PARAM_HAS_RESOURCE_STREAMER,
+		.value = &val,
+	};
+	ioctl(fd, DRM_IOCTL_I915_GETPARAM , &gp);
+	return val > 0;
+}
+
+static void test_batch_first(int fd)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	uint32_t *map, value;
+	int i;
+
+	igt_require(gem_can_store_dword(fd, 0));
+	igt_require(has_exec_batch_first(fd));
+
+	memset(obj, 0, sizeof(obj));
+	memset(reloc, 0, sizeof(reloc));
+
+	obj[0].handle = gem_create(fd, 4096);
+	obj[1].handle = gem_create(fd, 4096);
+	obj[2].handle = gem_create(fd, 4096);
+
+	reloc[0].target_handle = obj[1].handle;
+	reloc[0].offset = sizeof(uint32_t);
+	reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc[0].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	obj[0].relocs_ptr = to_user_pointer(&reloc[0]);
+	obj[0].relocation_count = 1;
+
+	i = 0;
+	map = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, obj[0].handle,
+			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	map[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		map[++i] = 0;
+		map[++i] = 0;
+	} else if (gen >= 4) {
+		map[++i] = 0;
+		map[++i] = 0;
+		reloc[0].offset += sizeof(uint32_t);
+	} else {
+		map[i]--;
+		map[++i] = 0;
+	}
+	map[++i] = 1;
+	map[++i] = MI_BATCH_BUFFER_END;
+	munmap(map, 4096);
+
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].offset = sizeof(uint32_t);
+	reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc[1].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	obj[2].relocs_ptr = to_user_pointer(&reloc[1]);
+	obj[2].relocation_count = 1;
+
+	i = 0;
+	map = gem_mmap__cpu(fd, obj[2].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, obj[2].handle,
+			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	map[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		map[++i] = 0;
+		map[++i] = 0;
+	} else if (gen >= 4) {
+		map[++i] = 0;
+		map[++i] = 0;
+		reloc[1].offset += sizeof(uint32_t);
+	} else {
+		map[i]--;
+		map[++i] = 0;
+	}
+	map[++i] = 2;
+	map[++i] = MI_BATCH_BUFFER_END;
+	munmap(map, 4096);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (gen > 3 && gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	/* Normal mode */
+	gem_execbuf(fd, &execbuf);
+	gem_read(fd, obj[1].handle, 0, &value, sizeof(value));
+	igt_assert_eq_u32(value, 2);
+
+	/* Batch first mode */
+	execbuf.flags |= LOCAL_I915_EXEC_BATCH_FIRST;
+	gem_execbuf(fd, &execbuf);
+	gem_read(fd, obj[1].handle, 0, &value, sizeof(value));
+	igt_assert_eq_u32(value, 1);
+
+	gem_close(fd, obj[2].handle);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[0].handle);
+}
+
+struct drm_i915_gem_execbuffer2 execbuf;
+struct drm_i915_gem_exec_object2 gem_exec[1];
+uint32_t batch[2] = {MI_BATCH_BUFFER_END};
+uint32_t handle, devid;
+int fd;
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		devid = intel_get_drm_devid(fd);
+
+		handle = gem_create(fd, 4096);
+		gem_write(fd, handle, 0, batch, sizeof(batch));
+
+		gem_exec[0].handle = handle;
+		gem_exec[0].relocation_count = 0;
+		gem_exec[0].relocs_ptr = 0;
+		gem_exec[0].alignment = 0;
+		gem_exec[0].offset = 0;
+		gem_exec[0].flags = 0;
+		gem_exec[0].rsvd1 = 0;
+		gem_exec[0].rsvd2 = 0;
+
+		execbuf.buffers_ptr = to_user_pointer(gem_exec);
+		execbuf.buffer_count = 1;
+		execbuf.batch_start_offset = 0;
+		execbuf.batch_len = 8;
+		execbuf.cliprects_ptr = 0;
+		execbuf.num_cliprects = 0;
+		execbuf.DR1 = 0;
+		execbuf.DR4 = 0;
+		execbuf.flags = 0;
+		i915_execbuffer2_set_context_id(execbuf, 0);
+		execbuf.rsvd2 = 0;
+	}
+
+	igt_subtest("control") {
+		for (e = intel_execution_engines; e->name; e++) {
+			if (has_ring(fd, e->exec_id | e->flags)) {
+				execbuf.flags = e->exec_id | e->flags;
+				gem_execbuf(fd, &execbuf);
+			}
+		}
+	}
+
+#define RUN_FAIL(expected_errno) do { \
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -expected_errno); \
+	} while(0)
+
+	igt_subtest("no-bsd") {
+		igt_require(!gem_has_bsd(fd));
+		execbuf.flags = I915_EXEC_BSD;
+		RUN_FAIL(EINVAL);
+	}
+	igt_subtest("no-blt") {
+		igt_require(!gem_has_blt(fd));
+		execbuf.flags = I915_EXEC_BLT;
+		RUN_FAIL(EINVAL);
+	}
+	igt_subtest("no-vebox") {
+		igt_require(!gem_has_vebox(fd));
+		execbuf.flags = LOCAL_I915_EXEC_VEBOX;
+		RUN_FAIL(EINVAL);
+	}
+	igt_subtest("invalid-ring") {
+		execbuf.flags = I915_EXEC_RING_MASK;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("invalid-ring2") {
+		execbuf.flags = LOCAL_I915_EXEC_VEBOX+1;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("invalid-bsd-ring") {
+		igt_require(gem_has_bsd2(fd));
+		execbuf.flags = I915_EXEC_BSD | LOCAL_I915_EXEC_BSD_MASK;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("invalid-bsd1-flag-on-render") {
+		execbuf.flags = I915_EXEC_RENDER | LOCAL_I915_EXEC_BSD_RING1;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("invalid-bsd2-flag-on-render") {
+		execbuf.flags = I915_EXEC_RENDER | LOCAL_I915_EXEC_BSD_RING2;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("invalid-bsd1-flag-on-blt") {
+		execbuf.flags = I915_EXEC_BLT | LOCAL_I915_EXEC_BSD_RING1;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("invalid-bsd2-flag-on-blt") {
+		execbuf.flags = I915_EXEC_BLT | LOCAL_I915_EXEC_BSD_RING2;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("invalid-bsd1-flag-on-vebox") {
+		igt_require(gem_has_vebox(fd));
+		execbuf.flags = LOCAL_I915_EXEC_VEBOX | LOCAL_I915_EXEC_BSD_RING1;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("invalid-bsd2-flag-on-vebox") {
+		igt_require(gem_has_vebox(fd));
+		execbuf.flags = LOCAL_I915_EXEC_VEBOX | LOCAL_I915_EXEC_BSD_RING2;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("rel-constants-invalid-ring") {
+		igt_require(gem_has_bsd(fd));
+		execbuf.flags = I915_EXEC_BSD | I915_EXEC_CONSTANTS_ABSOLUTE;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("rel-constants-invalid-rel-gen5") {
+		igt_require(intel_gen(devid) > 5);
+		execbuf.flags = I915_EXEC_RENDER | I915_EXEC_CONSTANTS_REL_SURFACE;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("rel-constants-invalid") {
+		execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_CONSTANTS_REL_SURFACE+(1<<6));
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("sol-reset-invalid") {
+		igt_require(gem_has_bsd(fd));
+		execbuf.flags = I915_EXEC_BSD | I915_EXEC_GEN7_SOL_RESET;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("sol-reset-not-gen7") {
+		igt_require(intel_gen(devid) != 7);
+		execbuf.flags = I915_EXEC_RENDER | I915_EXEC_GEN7_SOL_RESET;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("secure-non-root") {
+		igt_fork(child, 1) {
+			igt_drop_root();
+
+			execbuf.flags = I915_EXEC_RENDER | I915_EXEC_SECURE;
+			RUN_FAIL(EPERM);
+		}
+
+		igt_waitchildren();
+	}
+
+	igt_subtest("secure-non-master") {
+		igt_require(__igt_device_set_master(fd) == 0); /* Requires root privilege */
+
+		igt_device_drop_master(fd);
+		execbuf.flags = I915_EXEC_RENDER | I915_EXEC_SECURE;
+		RUN_FAIL(EPERM);
+
+		igt_device_set_master(fd);
+		gem_execbuf(fd, &execbuf);
+
+		igt_device_drop_master(fd); /* Only needs temporary master */
+	}
+
+	/* HANDLE_LUT and NO_RELOC are already exercised by gem_exec_lut_handle,
+	 * EXEC_FENCE_IN and EXEC_FENCE_OUT correct usage is tested by
+	 * gem_exec_fence, invalid usage of EXEC_FENCE_IN is tested below. */
+
+	igt_subtest("invalid-flag") {
+		/* NOTE: This test intentionally exercise the next available
+		 * flag. Don't "fix" this testcase without adding the required
+		 * tests for the new flag first. */
+		execbuf.flags = I915_EXEC_RENDER | (LOCAL_I915_EXEC_FENCE_ARRAY << 1);
+		RUN_FAIL(EINVAL);
+	}
+
+	/* rsvd1 aka context id is already exercised  by gem_ctx_bad_exec */
+
+	igt_subtest("cliprects-invalid") {
+		igt_require(intel_gen(devid) >= 5);
+		execbuf.flags = 0;
+		execbuf.num_cliprects = 1;
+		RUN_FAIL(EINVAL);
+		execbuf.num_cliprects = 0;
+	}
+
+	igt_subtest("rs-invalid") {
+		bool has_rs = has_resource_streamer(fd);
+		unsigned int engine;
+
+		for_each_engine(fd, engine) {
+			int expect = -EINVAL;
+			if (has_rs && (engine == 0 || engine == I915_EXEC_RENDER))
+				expect = 0;
+
+			execbuf.flags = engine | LOCAL_I915_EXEC_RESOURCE_STREAMER;
+			igt_assert_eq(__gem_execbuf(fd, &execbuf), expect);
+		}
+	}
+
+	igt_subtest("invalid-fence-in") {
+		igt_require(gem_has_exec_fence(fd));
+		execbuf.flags = LOCAL_I915_EXEC_FENCE_IN;
+		execbuf.rsvd2 = -1;
+		RUN_FAIL(EINVAL);
+		execbuf.rsvd2 = fd;
+		RUN_FAIL(EINVAL);
+	}
+
+	igt_subtest("rsvd2-dirt") {
+		igt_require(!gem_has_exec_fence(fd));
+		execbuf.flags = 0;
+		execbuf.rsvd2 = 1;
+		RUN_FAIL(EINVAL);
+		execbuf.rsvd2 = 0;
+	}
+
+	igt_subtest("batch-first")
+		test_batch_first(fd);
+
+#define DIRT(name) \
+	igt_subtest(#name "-dirt") { \
+		execbuf.flags = 0; \
+		execbuf.name = 1; \
+		RUN_FAIL(EINVAL); \
+		execbuf.name = 0; \
+	}
+
+	DIRT(cliprects_ptr);
+	DIRT(DR1);
+	DIRT(DR4);
+#undef DIRT
+
+#undef RUN_FAIL
+
+	igt_fixture {
+		gem_close(fd, handle);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_parse.c b/tests/i915/gem_exec_parse.c
new file mode 100644
index 00000000..b653b1bd
--- /dev/null
+++ b/tests/i915/gem_exec_parse.c
@@ -0,0 +1,674 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include <drm.h>
+
+#ifndef I915_PARAM_CMD_PARSER_VERSION
+#define I915_PARAM_CMD_PARSER_VERSION       28
+#endif
+
+#define DERRMR 0x44050
+#define OASTATUS2 0x2368
+#define OACONTROL 0x2360
+#define SO_WRITE_OFFSET_0 0x5280
+
+#define HSW_CS_GPR(n) (0x2600 + 8*(n))
+#define HSW_CS_GPR0 HSW_CS_GPR(0)
+#define HSW_CS_GPR1 HSW_CS_GPR(1)
+
+/* To help craft commands known to be invalid across all engines */
+#define INSTR_CLIENT_SHIFT	29
+#define   INSTR_INVALID_CLIENT  0x7
+
+#define MI_LOAD_REGISTER_REG (0x2a << 23)
+#define MI_STORE_REGISTER_MEM (0x24 << 23)
+#define MI_ARB_ON_OFF (0x8 << 23)
+#define MI_DISPLAY_FLIP ((0x14 << 23) | 1)
+
+#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
+#define   PIPE_CONTROL_QW_WRITE	(1<<14)
+#define   PIPE_CONTROL_LRI_POST_OP (1<<23)
+
+static int parser_version;
+
+static int command_parser_version(int fd)
+{
+	int version = -1;
+	drm_i915_getparam_t gp;
+
+	gp.param = I915_PARAM_CMD_PARSER_VERSION;
+	gp.value = &version;
+
+	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
+		return version;
+
+	return -1;
+}
+
+static uint64_t __exec_batch_patched(int fd, uint32_t cmd_bo, uint32_t *cmds,
+				     int size, int patch_offset)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[1];
+
+	uint32_t target_bo = gem_create(fd, 4096);
+	uint64_t actual_value = 0;
+
+	gem_write(fd, cmd_bo, 0, cmds, size);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = target_bo;
+	obj[1].handle = cmd_bo;
+
+	memset(reloc, 0, sizeof(reloc));
+	reloc[0].offset = patch_offset;
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].delta = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
+	reloc[0].write_domain = I915_GEM_DOMAIN_COMMAND;
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+	obj[1].relocation_count = 1;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.batch_len = size;
+	execbuf.flags = I915_EXEC_RENDER;
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, cmd_bo);
+
+	gem_read(fd,target_bo, 0, &actual_value, sizeof(actual_value));
+
+	gem_close(fd, target_bo);
+
+	return actual_value;
+}
+
+static void exec_batch_patched(int fd, uint32_t cmd_bo, uint32_t *cmds,
+			       int size, int patch_offset,
+			       uint64_t expected_value)
+{
+	igt_assert_eq(__exec_batch_patched(fd, cmd_bo, cmds,
+					   size, patch_offset),
+		      expected_value);
+}
+
+static int __exec_batch(int fd, uint32_t cmd_bo, uint32_t *cmds,
+			int size, int ring)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[1];
+
+	gem_write(fd, cmd_bo, 0, cmds, size);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = cmd_bo;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 1;
+	execbuf.batch_len = size;
+	execbuf.flags = ring;
+
+	return __gem_execbuf(fd, &execbuf);
+}
+#define exec_batch(fd, bo, cmds, sz, ring, expected) \
+	igt_assert_eq(__exec_batch(fd, bo, cmds, sz, ring), expected)
+
+static void exec_split_batch(int fd, uint32_t *cmds,
+			     int size, int ring, int expected_ret)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[1];
+	uint32_t cmd_bo;
+	uint32_t noop[1024] = { 0 };
+	const int alloc_size = 4096 * 2;
+	const int actual_start_offset = 4096-sizeof(uint32_t);
+
+	/* Allocate and fill a 2-page batch with noops */
+	cmd_bo = gem_create(fd, alloc_size);
+	gem_write(fd, cmd_bo, 0, noop, sizeof(noop));
+	gem_write(fd, cmd_bo, 4096, noop, sizeof(noop));
+
+	/* Write the provided commands such that the first dword
+	 * of the command buffer is the last dword of the first
+	 * page (i.e. the command is split across the two pages).
+	 */
+	gem_write(fd, cmd_bo, actual_start_offset, cmds, size);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = cmd_bo;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 1;
+	/* NB: We want batch_start_offset and batch_len to point to the block
+	 * of the actual commands (i.e. at the last dword of the first page),
+	 * but have to adjust both the start offset and length to meet the
+	 * kernel driver's requirements on the alignment of those fields.
+	 */
+	execbuf.batch_start_offset = actual_start_offset & ~0x7;
+	execbuf.batch_len =
+		ALIGN(size + actual_start_offset - execbuf.batch_start_offset,
+		      0x8);
+	execbuf.flags = ring;
+
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), expected_ret);
+
+	gem_sync(fd, cmd_bo);
+	gem_close(fd, cmd_bo);
+}
+
+static void exec_batch_chained(int fd, uint32_t cmd_bo, uint32_t *cmds,
+			       int size, int patch_offset,
+			       uint64_t expected_value)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[1];
+	struct drm_i915_gem_relocation_entry first_level_reloc;
+
+	uint32_t target_bo = gem_create(fd, 4096);
+	uint32_t first_level_bo = gem_create(fd, 4096);
+	uint64_t actual_value = 0;
+
+	static uint32_t first_level_cmds[] = {
+		MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965,
+		0,
+		MI_BATCH_BUFFER_END,
+		0,
+	};
+
+	if (IS_HASWELL(intel_get_drm_devid(fd)))
+		first_level_cmds[0] |= MI_BATCH_NON_SECURE_HSW;
+
+	gem_write(fd, first_level_bo, 0,
+		  first_level_cmds, sizeof(first_level_cmds));
+	gem_write(fd, cmd_bo, 0, cmds, size);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = target_bo;
+	obj[1].handle = cmd_bo;
+	obj[2].handle = first_level_bo;
+
+	memset(reloc, 0, sizeof(reloc));
+	reloc[0].offset = patch_offset;
+	reloc[0].delta = 0;
+	reloc[0].target_handle = target_bo;
+	reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
+	reloc[0].write_domain = I915_GEM_DOMAIN_COMMAND;
+	obj[1].relocation_count = 1;
+	obj[1].relocs_ptr = to_user_pointer(&reloc);
+
+	memset(&first_level_reloc, 0, sizeof(first_level_reloc));
+	first_level_reloc.offset = 4;
+	first_level_reloc.delta = 0;
+	first_level_reloc.target_handle = cmd_bo;
+	first_level_reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
+	first_level_reloc.write_domain = 0;
+	obj[2].relocation_count = 1;
+	obj[2].relocs_ptr = to_user_pointer(&first_level_reloc);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 3;
+	execbuf.batch_len = sizeof(first_level_cmds);
+	execbuf.flags = I915_EXEC_RENDER;
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, cmd_bo);
+
+	gem_read(fd,target_bo, 0, &actual_value, sizeof(actual_value));
+	igt_assert_eq(expected_value, actual_value);
+
+	gem_close(fd, first_level_bo);
+	gem_close(fd, target_bo);
+}
+
+/* Be careful to take into account what register bits we can store and read
+ * from...
+ */
+struct test_lri {
+	const char *name; /* register name for debug info */
+	uint32_t reg; /* address to test */
+	uint32_t read_mask; /* ignore things like HW status bits */
+	uint32_t init_val; /* initial identifiable value to set without LRI */
+	uint32_t test_val; /* value to attempt loading via LRI command */
+	bool whitelisted; /* expect to become NOOP / fail if not whitelisted */
+	int min_ver; /* required command parser version to test */
+};
+
+static void
+test_lri(int fd, uint32_t handle, struct test_lri *test)
+{
+	uint32_t lri[] = {
+		MI_LOAD_REGISTER_IMM,
+		test->reg,
+		test->test_val,
+		MI_BATCH_BUFFER_END,
+	};
+	int bad_lri_errno = parser_version >= 8 ? 0 : -EINVAL;
+	int expected_errno = test->whitelisted ? 0 : bad_lri_errno;
+	uint32_t expect = test->whitelisted ? test->test_val : test->init_val;
+
+	igt_debug("Testing %s LRI: addr=%x, val=%x, expected errno=%d, expected val=%x\n",
+		  test->name, test->reg, test->test_val,
+		  expected_errno, expect);
+
+	intel_register_write(test->reg, test->init_val);
+
+	igt_assert_eq_u32((intel_register_read(test->reg) &
+			   test->read_mask),
+			  test->init_val);
+
+	exec_batch(fd, handle,
+		   lri, sizeof(lri),
+		   I915_EXEC_RENDER,
+		   expected_errno);
+	gem_sync(fd, handle);
+
+	igt_assert_eq_u32((intel_register_read(test->reg) &
+			   test->read_mask),
+			  expect);
+}
+
+static void test_allocations(int fd)
+{
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[17];
+	int i, j;
+
+	intel_require_memory(2, 1ull<<(12 + ARRAY_SIZE(obj)), CHECK_RAM);
+
+	memset(obj, 0, sizeof(obj));
+	for (i = 0; i < ARRAY_SIZE(obj); i++) {
+		uint64_t size = 1ull << (12 + i);
+
+		obj[i].handle = gem_create(fd, size);
+		for (uint64_t page = 4096; page <= size; page += 4096)
+			gem_write(fd, obj[i].handle,
+				  page - sizeof(bbe), &bbe, sizeof(bbe));
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffer_count = 1;
+	for (j = 0; j < 16384; j++) {
+		igt_progress("allocations ", j, 16384);
+		i = rand() % ARRAY_SIZE(obj);
+		execbuf.buffers_ptr = to_user_pointer(&obj[i]);
+		execbuf.batch_start_offset = (rand() % (1ull<<i)) << 12;
+		execbuf.batch_start_offset += 64 * (rand() % 64);
+		execbuf.batch_len = (1ull<<(12+i)) - execbuf.batch_start_offset;
+		gem_execbuf(fd, &execbuf);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(obj); i++) {
+		gem_sync(fd, obj[i].handle);
+		gem_close(fd, obj[i].handle);
+	}
+}
+
+static void hsw_load_register_reg(void)
+{
+	uint32_t init_gpr0[16] = {
+		MI_LOAD_REGISTER_IMM | (3 - 2),
+		HSW_CS_GPR0,
+		0xabcdabc0, /* leave [1:0] zero */
+		MI_BATCH_BUFFER_END,
+	};
+	uint32_t store_gpr0[16] = {
+		MI_STORE_REGISTER_MEM | (3 - 2),
+		HSW_CS_GPR0,
+		0, /* reloc*/
+		MI_BATCH_BUFFER_END,
+	};
+	uint32_t do_lrr[16] = {
+		MI_LOAD_REGISTER_REG | (3 - 2),
+		0, /* [1] = src */
+		HSW_CS_GPR0, /* dst */
+		MI_BATCH_BUFFER_END,
+	};
+	uint32_t allowed_regs[] = {
+		HSW_CS_GPR1,
+		SO_WRITE_OFFSET_0,
+	};
+	uint32_t disallowed_regs[] = {
+		0,
+		OACONTROL, /* filtered */
+		DERRMR, /* master only */
+		0x2038, /* RING_START: invalid */
+	};
+	int fd;
+	uint32_t handle;
+	int bad_lrr_errno = parser_version >= 8 ? 0 : -EINVAL;
+
+	/* Open again to get a non-master file descriptor */
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_require(IS_HASWELL(intel_get_drm_devid(fd)));
+	igt_require(parser_version >= 7);
+
+	handle = gem_create(fd, 4096);
+
+	for (int i = 0 ; i < ARRAY_SIZE(allowed_regs); i++) {
+		uint32_t var;
+
+		exec_batch(fd, handle, init_gpr0, sizeof(init_gpr0),
+			   I915_EXEC_RENDER,
+			   0);
+		exec_batch_patched(fd, handle,
+				   store_gpr0, sizeof(store_gpr0),
+				   2 * sizeof(uint32_t), /* reloc */
+				   0xabcdabc0);
+		do_lrr[1] = allowed_regs[i];
+		exec_batch(fd, handle, do_lrr, sizeof(do_lrr),
+			   I915_EXEC_RENDER,
+			   0);
+		var = __exec_batch_patched(fd, handle,
+					   store_gpr0, sizeof(store_gpr0),
+					   2 * sizeof(uint32_t)); /* reloc */
+		igt_assert_neq(var, 0xabcdabc0);
+	}
+
+	for (int i = 0 ; i < ARRAY_SIZE(disallowed_regs); i++) {
+		exec_batch(fd, handle, init_gpr0, sizeof(init_gpr0),
+			   I915_EXEC_RENDER,
+			   0);
+		exec_batch_patched(fd, handle,
+				   store_gpr0, sizeof(store_gpr0),
+				   2 * sizeof(uint32_t), /* reloc */
+				   0xabcdabc0);
+		do_lrr[1] = disallowed_regs[i];
+		exec_batch(fd, handle, do_lrr, sizeof(do_lrr),
+			   I915_EXEC_RENDER,
+			   bad_lrr_errno);
+		exec_batch_patched(fd, handle,
+				   store_gpr0, sizeof(store_gpr0),
+				   2 * sizeof(uint32_t), /* reloc */
+				   0xabcdabc0);
+	}
+
+	close(fd);
+}
+
+igt_main
+{
+	uint32_t handle;
+	int fd;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		parser_version = command_parser_version(fd);
+		igt_require(parser_version != -1);
+
+		igt_require(gem_uses_ppgtt(fd));
+
+		handle = gem_create(fd, 4096);
+
+		/* ATM cmd parser only exists on gen7. */
+		igt_require(intel_gen(intel_get_drm_devid(fd)) == 7);
+		igt_fork_hang_detector(fd);
+	}
+
+	igt_subtest("basic-allowed") {
+		uint32_t pc[] = {
+			GFX_OP_PIPE_CONTROL,
+			PIPE_CONTROL_QW_WRITE,
+			0, /* To be patched */
+			0x12000000,
+			0,
+			MI_BATCH_BUFFER_END,
+		};
+		exec_batch_patched(fd, handle,
+				   pc, sizeof(pc),
+				   8, /* patch offset, */
+				   0x12000000);
+	}
+
+	igt_subtest("basic-rejected") {
+		uint32_t invalid_cmd[] = {
+			INSTR_INVALID_CLIENT << INSTR_CLIENT_SHIFT,
+			MI_BATCH_BUFFER_END,
+		};
+		uint32_t invalid_set_context[] = {
+			MI_SET_CONTEXT | 32, /* invalid length */
+			MI_BATCH_BUFFER_END,
+		};
+		exec_batch(fd, handle,
+			   invalid_cmd, sizeof(invalid_cmd),
+			   I915_EXEC_RENDER,
+			   -EINVAL);
+		exec_batch(fd, handle,
+			   invalid_cmd, sizeof(invalid_cmd),
+			   I915_EXEC_BSD,
+			   -EINVAL);
+		if (gem_has_blt(fd)) {
+			exec_batch(fd, handle,
+				   invalid_cmd, sizeof(invalid_cmd),
+				   I915_EXEC_BLT,
+				   -EINVAL);
+		}
+		if (gem_has_vebox(fd)) {
+			exec_batch(fd, handle,
+				   invalid_cmd, sizeof(invalid_cmd),
+				   I915_EXEC_VEBOX,
+				   -EINVAL);
+		}
+
+		exec_batch(fd, handle,
+			   invalid_set_context, sizeof(invalid_set_context),
+			   I915_EXEC_RENDER,
+			   -EINVAL);
+	}
+
+	igt_subtest("basic-allocation") {
+		test_allocations(fd);
+	}
+
+	igt_subtest_group {
+#define REG(R, MSK, INI, V, OK, MIN_V) { #R, R, MSK, INI, V, OK, MIN_V }
+		struct test_lri lris[] = {
+			/* dummy head pointer */
+			REG(OASTATUS2,
+			    0xffffff80, 0xdeadf000, 0xbeeff000, false, 0),
+			/* NB: [1:0] MBZ */
+			REG(SO_WRITE_OFFSET_0,
+			    0xfffffffc, 0xabcdabc0, 0xbeefbee0, true, 0),
+
+			/* It's really important for us to check that
+			 * an LRI to OACONTROL doesn't result in an
+			 * EINVAL error because Mesa attempts writing
+			 * to OACONTROL to determine what extensions to
+			 * expose and will abort() for execbuffer()
+			 * errors.
+			 *
+			 * Mesa can gracefully recognise and handle the
+			 * LRI becoming a NOOP.
+			 *
+			 * The test values represent dummy context IDs
+			 * while leaving the OA unit disabled
+			 */
+			REG(OACONTROL,
+			    0xfffff000, 0xfeed0000, 0x31337000, false, 9)
+		};
+#undef REG
+
+		igt_fixture {
+			intel_register_access_init(intel_get_pci_device(), 0, fd);
+		}
+
+		for (int i = 0; i < ARRAY_SIZE(lris); i++) {
+			igt_subtest_f("test-lri-%s", lris[i].name) {
+				igt_require_f(parser_version >= lris[i].min_ver,
+					      "minimum required parser version for test = %d\n",
+					      lris[i].min_ver);
+				test_lri(fd, handle, lris + i);
+			}
+		}
+
+		igt_fixture {
+			intel_register_access_fini();
+		}
+	}
+
+	igt_subtest("bitmasks") {
+		uint32_t pc[] = {
+			GFX_OP_PIPE_CONTROL,
+			(PIPE_CONTROL_QW_WRITE |
+			 PIPE_CONTROL_LRI_POST_OP),
+			0, /* To be patched */
+			0x12000000,
+			0,
+			MI_BATCH_BUFFER_END,
+		};
+		if (parser_version >= 8) {
+			/* Expect to read back zero since the command should be
+			 * squashed to a NOOP
+			 */
+			exec_batch_patched(fd, handle,
+					   pc, sizeof(pc),
+					   8, /* patch offset, */
+					   0x0);
+		} else {
+			exec_batch(fd, handle,
+				   pc, sizeof(pc),
+				   I915_EXEC_RENDER,
+				   -EINVAL);
+		}
+	}
+
+	igt_subtest("batch-without-end") {
+		uint32_t noop[1024] = { 0 };
+		exec_batch(fd, handle,
+			   noop, sizeof(noop),
+			   I915_EXEC_RENDER,
+			   -EINVAL);
+	}
+
+	igt_subtest("cmd-crossing-page") {
+		uint32_t lri_ok[] = {
+			MI_LOAD_REGISTER_IMM,
+			SO_WRITE_OFFSET_0, /* allowed register address */
+			0xdcbaabc0, /* [1:0] MBZ */
+			MI_BATCH_BUFFER_END,
+		};
+		uint32_t store_reg[] = {
+			MI_STORE_REGISTER_MEM | (3 - 2),
+			SO_WRITE_OFFSET_0,
+			0, /* reloc */
+			MI_BATCH_BUFFER_END,
+		};
+		exec_split_batch(fd,
+				 lri_ok, sizeof(lri_ok),
+				 I915_EXEC_RENDER,
+				 0);
+		exec_batch_patched(fd, handle,
+				   store_reg,
+				   sizeof(store_reg),
+				   2 * sizeof(uint32_t), /* reloc */
+				   0xdcbaabc0);
+	}
+
+	igt_subtest("oacontrol-tracking") {
+		uint32_t lri_ok[] = {
+			MI_LOAD_REGISTER_IMM,
+			OACONTROL,
+			0x31337000,
+			MI_LOAD_REGISTER_IMM,
+			OACONTROL,
+			0x0,
+			MI_BATCH_BUFFER_END,
+			0
+		};
+		uint32_t lri_bad[] = {
+			MI_LOAD_REGISTER_IMM,
+			OACONTROL,
+			0x31337000,
+			MI_BATCH_BUFFER_END,
+		};
+		uint32_t lri_extra_bad[] = {
+			MI_LOAD_REGISTER_IMM,
+			OACONTROL,
+			0x31337000,
+			MI_LOAD_REGISTER_IMM,
+			OACONTROL,
+			0x0,
+			MI_LOAD_REGISTER_IMM,
+			OACONTROL,
+			0x31337000,
+			MI_BATCH_BUFFER_END,
+		};
+
+		igt_require(parser_version < 9);
+
+		exec_batch(fd, handle,
+			   lri_ok, sizeof(lri_ok),
+			   I915_EXEC_RENDER,
+			   0);
+		exec_batch(fd, handle,
+			   lri_bad, sizeof(lri_bad),
+			   I915_EXEC_RENDER,
+			   -EINVAL);
+		exec_batch(fd, handle,
+			   lri_extra_bad, sizeof(lri_extra_bad),
+			   I915_EXEC_RENDER,
+			   -EINVAL);
+	}
+
+	igt_subtest("chained-batch") {
+		uint32_t pc[] = {
+			GFX_OP_PIPE_CONTROL,
+			PIPE_CONTROL_QW_WRITE,
+			0, /* To be patched */
+			0x12000000,
+			0,
+			MI_BATCH_BUFFER_END,
+		};
+		exec_batch_chained(fd, handle,
+				   pc, sizeof(pc),
+				   8, /* patch offset, */
+				   0x12000000);
+	}
+
+	igt_subtest("load-register-reg")
+		hsw_load_register_reg();
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		gem_close(fd, handle);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_reloc.c b/tests/i915/gem_exec_reloc.c
new file mode 100644
index 00000000..837f60a6
--- /dev/null
+++ b/tests/i915/gem_exec_reloc.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_dummyload.h"
+
+IGT_TEST_DESCRIPTION("Basic sanity check of execbuf-ioctl relocations.");
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+static uint32_t find_last_set(uint64_t x)
+{
+	uint32_t i = 0;
+	while (x) {
+		x >>= 1;
+		i++;
+	}
+	return i;
+}
+
+static void write_dword(int fd,
+			uint32_t target_handle,
+			uint64_t target_offset,
+			uint32_t value)
+{
+	int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc;
+	uint32_t buf[16];
+	int i;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = target_handle;
+	obj[1].handle = gem_create(fd, 4096);
+
+	i = 0;
+	buf[i++] = MI_STORE_DWORD_IMM | (gen < 6 ? 1<<22 : 0);
+	if (gen >= 8) {
+		buf[i++] = target_offset;
+		buf[i++] = target_offset >> 32;
+	} else if (gen >= 4) {
+		buf[i++] = 0;
+		buf[i++] = target_offset;
+	} else {
+		buf[i-1]--;
+		buf[i++] = target_offset;
+	}
+	buf[i++] = value;
+	buf[i++] = MI_BATCH_BUFFER_END;
+	gem_write(fd, obj[1].handle, 0, buf, sizeof(buf));
+
+	memset(&reloc, 0, sizeof(reloc));
+	if (gen >= 8 || gen < 4)
+		reloc.offset = sizeof(uint32_t);
+	else
+		reloc.offset = 2*sizeof(uint32_t);
+	reloc.target_handle = target_handle;
+	reloc.delta = target_offset;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+	obj[1].relocation_count = 1;
+	obj[1].relocs_ptr = to_user_pointer(&reloc);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = I915_EXEC_SECURE;
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, obj[1].handle);
+}
+
+enum mode { MEM, CPU, WC, GTT };
+#define RO 0x100
+static void from_mmap(int fd, uint64_t size, enum mode mode)
+{
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_relocation_entry *relocs;
+	uint32_t reloc_handle;
+	uint64_t value;
+	uint64_t max, i;
+	int retry = 2;
+
+	/* Worst case is that the kernel has to copy the entire incoming
+	 * reloc[], so double the memory requirements.
+	 */
+	intel_require_memory(2, size, CHECK_RAM);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	max = size / sizeof(*relocs);
+	switch (mode & ~RO) {
+	case MEM:
+		relocs = mmap(0, size,
+			      PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+			      -1, 0);
+		igt_assert(relocs != (void *)-1);
+		break;
+	case GTT:
+		reloc_handle = gem_create(fd, size);
+		relocs = gem_mmap__gtt(fd, reloc_handle, size, PROT_WRITE);
+		gem_set_domain(fd, reloc_handle,
+				I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+		gem_close(fd, reloc_handle);
+		break;
+	case CPU:
+		reloc_handle = gem_create(fd, size);
+		relocs = gem_mmap__cpu(fd, reloc_handle, 0, size, PROT_WRITE);
+		gem_set_domain(fd, reloc_handle,
+			       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+		gem_close(fd, reloc_handle);
+		break;
+	case WC:
+		reloc_handle = gem_create(fd, size);
+		relocs = gem_mmap__wc(fd, reloc_handle, 0, size, PROT_WRITE);
+		gem_set_domain(fd, reloc_handle,
+			       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+		gem_close(fd, reloc_handle);
+		break;
+	}
+
+	for (i = 0; i < max; i++) {
+		relocs[i].target_handle = obj.handle;
+		relocs[i].presumed_offset = ~0ull;
+		relocs[i].offset = 1024;
+		relocs[i].delta = i;
+		relocs[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		relocs[i].write_domain = 0;
+	}
+	obj.relocation_count = max;
+	obj.relocs_ptr = to_user_pointer(relocs);
+
+	if (mode & RO)
+		mprotect(relocs, size, PROT_READ);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	while (relocs[0].presumed_offset == ~0ull && retry--)
+		gem_execbuf(fd, &execbuf);
+	gem_read(fd, obj.handle, 1024, &value, sizeof(value));
+	gem_close(fd, obj.handle);
+
+	igt_assert_eq_u64(value, obj.offset + max - 1);
+	if (relocs[0].presumed_offset != ~0ull) {
+		for (i = 0; i < max; i++)
+			igt_assert_eq_u64(relocs[i].presumed_offset,
+					  obj.offset);
+	}
+	munmap(relocs, size);
+}
+
+static void from_gpu(int fd)
+{
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_relocation_entry *relocs;
+	uint32_t reloc_handle;
+	uint64_t value;
+
+	igt_require(gem_can_store_dword(fd, 0));
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	reloc_handle = gem_create(fd, 4096);
+	write_dword(fd,
+		    reloc_handle,
+		    offsetof(struct drm_i915_gem_relocation_entry,
+			     target_handle),
+		    obj.handle);
+	write_dword(fd,
+		    reloc_handle,
+		    offsetof(struct drm_i915_gem_relocation_entry,
+			     offset),
+		    1024);
+	write_dword(fd,
+		    reloc_handle,
+		    offsetof(struct drm_i915_gem_relocation_entry,
+			     read_domains),
+		    I915_GEM_DOMAIN_INSTRUCTION);
+
+	relocs = gem_mmap__cpu(fd, reloc_handle, 0, 4096, PROT_READ);
+	gem_set_domain(fd, reloc_handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	gem_close(fd, reloc_handle);
+
+	obj.relocation_count = 1;
+	obj.relocs_ptr = to_user_pointer(relocs);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	gem_execbuf(fd, &execbuf);
+	gem_read(fd, obj.handle, 1024, &value, sizeof(value));
+	gem_close(fd, obj.handle);
+
+	igt_assert_eq_u64(value, obj.offset);
+	igt_assert_eq_u64(relocs->presumed_offset, obj.offset);
+	munmap(relocs, 4096);
+}
+
+static void check_bo(int fd, uint32_t handle)
+{
+	uint32_t *map;
+	int i;
+
+	igt_debug("Verifying result\n");
+	map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_READ);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0);
+	for (i = 0; i < 1024; i++)
+		igt_assert_eq(map[i], i);
+	munmap(map, 4096);
+}
+
+static void active(int fd, unsigned engine)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned engines[16];
+	unsigned nengine;
+	int pass;
+
+	nengine = 0;
+	if (engine == ALL_ENGINES) {
+		for_each_physical_engine(fd, engine) {
+			if (gem_can_store_dword(fd, engine))
+				engines[nengine++] = engine;
+		}
+	} else {
+		igt_require(gem_has_ring(fd, engine));
+		igt_require(gem_can_store_dword(fd, engine));
+		engines[nengine++] = engine;
+	}
+	igt_require(nengine);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, 4096);
+	obj[1].handle = gem_create(fd, 64*1024);
+	obj[1].relocs_ptr = to_user_pointer(&reloc);
+	obj[1].relocation_count = 1;
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.offset = sizeof(uint32_t);
+	reloc.target_handle = obj[0].handle;
+	if (gen < 8 && gen >= 4)
+		reloc.offset += sizeof(uint32_t);
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	for (pass = 0; pass < 1024; pass++) {
+		uint32_t batch[16];
+		int i = 0;
+		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			batch[++i] = 0;
+			batch[++i] = 0;
+		} else if (gen >= 4) {
+			batch[++i] = 0;
+			batch[++i] = 0;
+		} else {
+			batch[i]--;
+			batch[++i] = 0;
+		}
+		batch[++i] = pass;
+		batch[++i] = MI_BATCH_BUFFER_END;
+		gem_write(fd, obj[1].handle, pass*sizeof(batch),
+			  batch, sizeof(batch));
+	}
+
+	for (pass = 0; pass < 1024; pass++) {
+		reloc.delta = 4*pass;
+		reloc.presumed_offset = -1;
+		execbuf.flags &= ~ENGINE_MASK;
+		execbuf.flags |= engines[rand() % nengine];
+		gem_execbuf(fd, &execbuf);
+		execbuf.batch_start_offset += 64;
+		reloc.offset += 64;
+	}
+	gem_close(fd, obj[1].handle);
+
+	check_bo(fd, obj[0].handle);
+	gem_close(fd, obj[0].handle);
+}
+
+static bool has_64b_reloc(int fd)
+{
+	return intel_gen(intel_get_drm_devid(fd)) >= 8;
+}
+
+#define NORELOC 1
+#define ACTIVE 2
+#define HANG 4
+static void basic_reloc(int fd, unsigned before, unsigned after, unsigned flags)
+{
+#define OBJSZ 8192
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint64_t address_mask = has_64b_reloc(fd) ? ~(uint64_t)0 : ~(uint32_t)0;
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	unsigned int reloc_offset;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, OBJSZ);
+	obj.relocs_ptr = to_user_pointer(&reloc);
+	obj.relocation_count = 1;
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	if (flags & NORELOC)
+		execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+	for (reloc_offset = 4096 - 8; reloc_offset <= 4096 + 8; reloc_offset += 4) {
+		igt_spin_t *spin = NULL;
+		uint32_t trash = 0;
+		uint64_t offset;
+
+		obj.offset = -1;
+
+		memset(&reloc, 0, sizeof(reloc));
+		reloc.offset = reloc_offset;
+		reloc.target_handle = obj.handle;
+		reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc.presumed_offset = -1;
+
+		if (before) {
+			char *wc;
+
+			if (before == I915_GEM_DOMAIN_CPU)
+				wc = gem_mmap__cpu(fd, obj.handle, 0, OBJSZ, PROT_WRITE);
+			else if (before == I915_GEM_DOMAIN_GTT)
+				wc = gem_mmap__gtt(fd, obj.handle, OBJSZ, PROT_WRITE);
+			else if (before == I915_GEM_DOMAIN_WC)
+				wc = gem_mmap__wc(fd, obj.handle, 0, OBJSZ, PROT_WRITE);
+			else
+				igt_assert(0);
+			gem_set_domain(fd, obj.handle, before, before);
+			offset = -1;
+			memcpy(wc + reloc_offset, &offset, sizeof(offset));
+			munmap(wc, OBJSZ);
+		} else {
+			offset = -1;
+			gem_write(fd, obj.handle, reloc_offset, &offset, sizeof(offset));
+		}
+
+		if (flags & ACTIVE) {
+			spin = igt_spin_batch_new(fd,
+						  .engine = I915_EXEC_DEFAULT,
+						  .dependency = obj.handle);
+			if (!(flags & HANG))
+				igt_spin_batch_set_timeout(spin, NSEC_PER_SEC/100);
+			igt_assert(gem_bo_busy(fd, obj.handle));
+		}
+
+		gem_execbuf(fd, &execbuf);
+
+		if (after) {
+			char *wc;
+
+			if (after == I915_GEM_DOMAIN_CPU)
+				wc = gem_mmap__cpu(fd, obj.handle, 0, OBJSZ, PROT_READ);
+			else if (after == I915_GEM_DOMAIN_GTT)
+				wc = gem_mmap__gtt(fd, obj.handle, OBJSZ, PROT_READ);
+			else if (after == I915_GEM_DOMAIN_WC)
+				wc = gem_mmap__wc(fd, obj.handle, 0, OBJSZ, PROT_READ);
+			else
+				igt_assert(0);
+			gem_set_domain(fd, obj.handle, after, 0);
+			offset = ~reloc.presumed_offset & address_mask;
+			memcpy(&offset, wc + reloc_offset, has_64b_reloc(fd) ? 8 : 4);
+			munmap(wc, OBJSZ);
+		} else {
+			offset = ~reloc.presumed_offset & address_mask;
+			gem_read(fd, obj.handle, reloc_offset, &offset, has_64b_reloc(fd) ? 8 : 4);
+		}
+
+		if (reloc.presumed_offset == -1)
+			igt_warn("reloc.presumed_offset == -1\n");
+		else
+			igt_assert_eq_u64(reloc.presumed_offset, offset);
+		igt_assert_eq_u64(obj.offset, offset);
+
+		igt_spin_batch_free(fd, spin);
+
+		/* Simulate relocation */
+		if (flags & NORELOC) {
+			obj.offset += OBJSZ;
+			reloc.presumed_offset += OBJSZ;
+		} else {
+			trash = obj.handle;
+			obj.handle = gem_create(fd, OBJSZ);
+			gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+			reloc.target_handle = obj.handle;
+		}
+
+		if (before) {
+			char *wc;
+
+			if (before == I915_GEM_DOMAIN_CPU)
+				wc = gem_mmap__cpu(fd, obj.handle, 0, OBJSZ, PROT_WRITE);
+			else if (before == I915_GEM_DOMAIN_GTT)
+				wc = gem_mmap__gtt(fd, obj.handle, OBJSZ, PROT_WRITE);
+			else if (before == I915_GEM_DOMAIN_WC)
+				wc = gem_mmap__wc(fd, obj.handle, 0, OBJSZ, PROT_WRITE);
+			else
+				igt_assert(0);
+			gem_set_domain(fd, obj.handle, before, before);
+			memcpy(wc + reloc_offset, &reloc.presumed_offset, sizeof(reloc.presumed_offset));
+			munmap(wc, OBJSZ);
+		} else {
+			gem_write(fd, obj.handle, reloc_offset, &reloc.presumed_offset, sizeof(reloc.presumed_offset));
+		}
+
+		if (flags & ACTIVE) {
+			spin = igt_spin_batch_new(fd,
+						  .engine = I915_EXEC_DEFAULT,
+						  .dependency = obj.handle);
+			if (!(flags & HANG))
+				igt_spin_batch_set_timeout(spin, NSEC_PER_SEC/100);
+			igt_assert(gem_bo_busy(fd, obj.handle));
+		}
+
+		gem_execbuf(fd, &execbuf);
+
+		if (after) {
+			char *wc;
+
+			if (after == I915_GEM_DOMAIN_CPU)
+				wc = gem_mmap__cpu(fd, obj.handle, 0, OBJSZ, PROT_READ);
+			else if (after == I915_GEM_DOMAIN_GTT)
+				wc = gem_mmap__gtt(fd, obj.handle, OBJSZ, PROT_READ);
+			else if (after == I915_GEM_DOMAIN_WC)
+				wc = gem_mmap__wc(fd, obj.handle, 0, OBJSZ, PROT_READ);
+			else
+				igt_assert(0);
+			gem_set_domain(fd, obj.handle, after, 0);
+			offset = ~reloc.presumed_offset & address_mask;
+			memcpy(&offset, wc + reloc_offset, has_64b_reloc(fd) ? 8 : 4);
+			munmap(wc, OBJSZ);
+		} else {
+			offset = ~reloc.presumed_offset & address_mask;
+			gem_read(fd, obj.handle, reloc_offset, &offset, has_64b_reloc(fd) ? 8 : 4);
+		}
+
+		if (reloc.presumed_offset == -1)
+			igt_warn("reloc.presumed_offset == -1\n");
+		else
+			igt_assert_eq_u64(reloc.presumed_offset, offset);
+		igt_assert_eq_u64(obj.offset, offset);
+
+		igt_spin_batch_free(fd, spin);
+		if (trash)
+			gem_close(fd, trash);
+	}
+
+	gem_close(fd, obj.handle);
+}
+
+static inline uint64_t sign_extend(uint64_t x, int index)
+{
+	int shift = 63 - index;
+	return (int64_t)(x << shift) >> shift;
+}
+
+static uint64_t gen8_canonical_address(uint64_t address)
+{
+	return sign_extend(address, 47);
+}
+
+static void basic_range(int fd, unsigned flags)
+{
+	struct drm_i915_gem_relocation_entry reloc[128];
+	struct drm_i915_gem_exec_object2 obj[128];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint64_t address_mask = has_64b_reloc(fd) ? ~(uint64_t)0 : ~(uint32_t)0;
+	uint64_t gtt_size = gem_aperture_size(fd);
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	igt_spin_t *spin = NULL;
+	int count, n;
+
+	igt_require(gem_has_softpin(fd));
+
+	for (count = 12; gtt_size >> (count + 1); count++)
+		;
+
+	count -= 12;
+
+	memset(obj, 0, sizeof(obj));
+	memset(reloc, 0, sizeof(reloc));
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	n = 0;
+	for (int i = 0; i <= count; i++) {
+		obj[n].handle = gem_create(fd, 4096);
+		obj[n].offset = (1ull << (i + 12)) - 4096;
+		obj[n].offset = gen8_canonical_address(obj[n].offset);
+		obj[n].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		gem_write(fd, obj[n].handle, 0, &bbe, sizeof(bbe));
+		execbuf.buffers_ptr = to_user_pointer(&obj[n]);
+		execbuf.buffer_count = 1;
+		if (__gem_execbuf(fd, &execbuf))
+			continue;
+
+		igt_debug("obj[%d] handle=%d, address=%llx\n",
+			  n, obj[n].handle, (long long)obj[n].offset);
+
+		reloc[n].offset = 8 * (n + 1);
+		reloc[n].target_handle = obj[n].handle;
+		reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[n].presumed_offset = -1;
+		n++;
+	}
+	for (int i = 1; i < count; i++) {
+		obj[n].handle = gem_create(fd, 4096);
+		obj[n].offset = 1ull << (i + 12);
+		obj[n].offset = gen8_canonical_address(obj[n].offset);
+		obj[n].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		gem_write(fd, obj[n].handle, 0, &bbe, sizeof(bbe));
+		execbuf.buffers_ptr = to_user_pointer(&obj[n]);
+		execbuf.buffer_count = 1;
+		if (__gem_execbuf(fd, &execbuf))
+			continue;
+
+		igt_debug("obj[%d] handle=%d, address=%llx\n",
+			  n, obj[n].handle, (long long)obj[n].offset);
+
+		reloc[n].offset = 8 * (n + 1);
+		reloc[n].target_handle = obj[n].handle;
+		reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[n].presumed_offset = -1;
+		n++;
+	}
+	igt_require(n);
+
+	obj[n].handle = gem_create(fd, 4096);
+	obj[n].relocs_ptr = to_user_pointer(reloc);
+	obj[n].relocation_count = n;
+	gem_write(fd, obj[n].handle, 0, &bbe, sizeof(bbe));
+
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = n + 1;
+
+	if (flags & ACTIVE) {
+		spin = igt_spin_batch_new(fd, .dependency = obj[n].handle);
+		if (!(flags & HANG))
+			igt_spin_batch_set_timeout(spin, NSEC_PER_SEC/100);
+		igt_assert(gem_bo_busy(fd, obj[n].handle));
+	}
+
+	gem_execbuf(fd, &execbuf);
+	igt_spin_batch_free(fd, spin);
+
+	for (int i = 0; i < n; i++) {
+		uint64_t offset;
+
+		offset = ~reloc[i].presumed_offset & address_mask;
+		gem_read(fd, obj[n].handle, reloc[i].offset,
+			 &offset, has_64b_reloc(fd) ? 8 : 4);
+
+		igt_debug("obj[%d] handle=%d, offset=%llx, found=%llx, presumed=%llx\n",
+			  i, obj[i].handle,
+			  (long long)obj[i].offset,
+			  (long long)offset,
+			  (long long)reloc[i].presumed_offset);
+
+		igt_assert_eq_u64(obj[i].offset, offset);
+		if (reloc[i].presumed_offset == -1)
+			igt_warn("reloc.presumed_offset == -1\n");
+		else
+			igt_assert_eq_u64(reloc[i].presumed_offset, offset);
+	}
+
+	for (int i = 0; i <= n; i++)
+		gem_close(fd, obj[i].handle);
+}
+
+static void basic_softpin(int fd)
+{
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint64_t offset;
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+
+	igt_require(gem_has_softpin(fd));
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 1;
+	gem_execbuf(fd, &execbuf);
+
+	offset = obj[1].offset;
+
+	obj[0].handle = gem_create(fd, 4096);
+	obj[0].offset = obj[1].offset;
+	obj[0].flags = EXEC_OBJECT_PINNED;
+
+	execbuf.buffers_ptr = to_user_pointer(&obj[0]);
+	execbuf.buffer_count = 2;
+
+	gem_execbuf(fd, &execbuf);
+	igt_assert_eq_u64(obj[0].offset, offset);
+
+	gem_close(fd, obj[0].handle);
+	gem_close(fd, obj[1].handle);
+}
+
+igt_main
+{
+	const struct mode {
+		const char *name;
+		unsigned before, after;
+	} modes[] = {
+		{ "cpu", I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU },
+		{ "gtt", I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT },
+		{ "wc", I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC },
+		{ "cpu-gtt", I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_GTT },
+		{ "gtt-cpu", I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_CPU },
+		{ "cpu-wc", I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_WC },
+		{ "wc-cpu", I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_CPU },
+		{ "gtt-wc", I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_WC },
+		{ "wc-gtt", I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_GTT },
+		{ "cpu-read", I915_GEM_DOMAIN_CPU, 0 },
+		{ "gtt-read", I915_GEM_DOMAIN_GTT, 0 },
+		{ "wc-read", I915_GEM_DOMAIN_WC, 0 },
+		{ "write-cpu", 0, I915_GEM_DOMAIN_CPU },
+		{ "write-gtt", 0, I915_GEM_DOMAIN_GTT },
+		{ "write-wc", 0, I915_GEM_DOMAIN_WC },
+		{ "write-read", 0, 0 },
+		{ },
+	}, *m;
+	const struct flags {
+		const char *name;
+		unsigned flags;
+		bool basic;
+	} flags[] = {
+		{ "", 0 , true},
+		{ "-noreloc", NORELOC, true },
+		{ "-active", ACTIVE, true },
+		{ "-hang", ACTIVE | HANG },
+		{ },
+	}, *f;
+	uint64_t size;
+	int fd = -1;
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	for (f = flags; f->name; f++) {
+		igt_hang_t hang;
+
+		igt_subtest_group {
+			igt_fixture {
+				if (f->flags & HANG)
+					hang = igt_allow_hang(fd, 0, 0);
+			}
+
+			for (m = modes; m->name; m++) {
+				igt_subtest_f("%s%s%s",
+					      f->basic ? "basic-" : "",
+					      m->name,
+					      f->name) {
+					if ((m->before | m->after) & I915_GEM_DOMAIN_WC)
+						igt_require(gem_mmap__has_wc(fd));
+					basic_reloc(fd, m->before, m->after, f->flags);
+				}
+			}
+
+			if (!(f->flags & NORELOC)) {
+				igt_subtest_f("%srange%s",
+					      f->basic ? "basic-" : "", f->name)
+					basic_range(fd, f->flags);
+			}
+
+			igt_fixture {
+				if (f->flags & HANG)
+					igt_disallow_hang(fd, hang);
+			}
+		}
+	}
+
+	igt_subtest("basic-softpin")
+		basic_softpin(fd);
+
+	for (size = 4096; size <= 4ull*1024*1024*1024; size <<= 1) {
+		igt_subtest_f("mmap-%u", find_last_set(size) - 1)
+			from_mmap(fd, size, MEM);
+		igt_subtest_f("readonly-%u", find_last_set(size) - 1)
+			from_mmap(fd, size, MEM | RO);
+		igt_subtest_f("cpu-%u", find_last_set(size) - 1)
+			from_mmap(fd, size, CPU);
+		igt_subtest_f("wc-%u", find_last_set(size) - 1) {
+			igt_require(gem_mmap__has_wc(fd));
+			from_mmap(fd, size, WC);
+		}
+		igt_subtest_f("gtt-%u", find_last_set(size) - 1)
+			from_mmap(fd, size, GTT);
+	}
+
+	igt_subtest("gpu")
+		from_gpu(fd);
+
+	igt_subtest("active")
+		active(fd, ALL_ENGINES);
+	for (const struct intel_execution_engine *e = intel_execution_engines;
+	     e->name; e++) {
+		igt_subtest_f("active-%s", e->name)
+			active(fd, e->exec_id | e->flags);
+	}
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_exec_reuse.c b/tests/i915/gem_exec_reuse.c
new file mode 100644
index 00000000..df220be7
--- /dev/null
+++ b/tests/i915/gem_exec_reuse.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <limits.h>
+#include <sys/resource.h>
+
+#include "igt.h"
+
+IGT_TEST_DESCRIPTION("Inspect scaling with large number of reused objects");
+
+struct noop {
+	struct drm_i915_gem_exec_object2 *obj;
+	uint32_t batch;
+	uint32_t *handles;
+	unsigned int nhandles;
+	unsigned int max_age;
+	int fd;
+};
+
+static void noop(struct noop *n,
+		 unsigned ring, unsigned ctx,
+		 unsigned int count, unsigned int offset)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned int i;
+
+	for (i = 0; i < count; i++)
+		n->obj[i].handle = n->handles[(i + offset) & (n->nhandles-1)];
+	n->obj[i].handle = n->batch;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(n->obj);
+	execbuf.buffer_count = count + 1;
+	execbuf.flags = ring | 1 << 12;
+	execbuf.rsvd1 = ctx;
+	gem_execbuf(n->fd, &execbuf);
+}
+
+static bool allow_unlimited_files(void)
+{
+	struct rlimit rlim;
+	unsigned nofile_rlim = 1024*1024;
+
+	FILE *file = fopen("/proc/sys/fs/file-max", "r");
+	if (file) {
+		igt_assert(fscanf(file, "%u", &nofile_rlim) == 1);
+		fclose(file);
+	}
+
+	if (getrlimit(RLIMIT_NOFILE, &rlim))
+		return false;
+
+	rlim.rlim_cur = rlim.rlim_max;
+	if (setrlimit(RLIMIT_NOFILE, &rlim))
+		return false;
+
+	rlim.rlim_cur = nofile_rlim;
+	rlim.rlim_max = nofile_rlim;
+	return setrlimit(RLIMIT_NOFILE, &rlim) == 0;
+}
+
+static uint64_t vfs_file_max(void)
+{
+	long long unsigned max = 80000;
+	FILE *file = fopen("/proc/sys/fs/file-max", "r");
+	if (file) {
+		igt_assert(fscanf(file, "%llu", &max) == 1);
+		fclose(file);
+	}
+
+	igt_info("System limit for open files is %llu\n", max);
+	return max;
+}
+
+static uint64_t max_open_files(void)
+{
+	struct rlimit rlim;
+
+	if (getrlimit(RLIMIT_NOFILE, &rlim))
+		rlim.rlim_cur = 64 << 10;
+
+	igt_info("Process limit for file descriptors is %lu\n",
+		 (long)rlim.rlim_cur);
+	return rlim.rlim_cur;
+}
+
+static unsigned int max_nfd(void)
+{
+	uint64_t vfs = vfs_file_max();
+	uint64_t fd = max_open_files();
+	uint64_t min = fd < vfs ? fd : vfs;
+	if (min > INT_MAX)
+		min = INT_MAX;
+	return min;
+}
+
+igt_main
+{
+	struct noop no;
+	unsigned engines[16];
+	unsigned nengine;
+	unsigned n;
+
+	igt_fixture {
+		uint64_t gtt_size, max;
+		uint32_t bbe = MI_BATCH_BUFFER_END;
+		unsigned engine;
+
+		allow_unlimited_files();
+
+		no.fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(no.fd);
+
+		igt_fork_hang_detector(no.fd);
+
+		gtt_size = (gem_aperture_size(no.fd) / 2) >> 12;
+		if (gtt_size > INT_MAX / sizeof(*no.handles))
+			gtt_size = INT_MAX / sizeof(*no.handles);
+
+		max = max_nfd() - 16;
+		if (max < gtt_size)
+			gtt_size = max;
+
+		no.nhandles = 1 << (igt_fls(gtt_size) - 1);
+		intel_require_memory(no.nhandles, 4096, CHECK_RAM);
+
+		no.max_age = no.nhandles / 2;
+
+		no.handles = malloc(sizeof(*no.handles) * no.nhandles);
+		for (n = 0; n < no.nhandles; n++)
+			no.handles[n] = gem_create(no.fd, 4096);
+
+		no.obj = malloc(sizeof(struct drm_i915_gem_exec_object2) * (no.max_age + 1));
+
+		nengine = 0;
+		for_each_engine(no.fd, engine)
+			if (engine)
+				engines[nengine++] = engine;
+		igt_require(nengine);
+
+		no.batch = gem_create(no.fd, 4096);
+		gem_write(no.fd, no.batch, 0, &bbe, sizeof(bbe));
+	}
+
+	igt_subtest_f("single") {
+		unsigned int timeout = 5;
+		unsigned long age = 0;
+
+		igt_until_timeout(timeout)
+			for (n = 0; n < nengine; n++)
+				noop(&no, engines[n], 0, 0, age++);
+		gem_sync(no.fd, no.batch);
+		igt_info("Completed %lu cycles\n", age);
+	}
+
+	igt_subtest_f("baggage") {
+		unsigned int timeout = 5;
+		unsigned long age = 0;
+
+		igt_until_timeout(timeout)
+			for (n = 0; n < nengine; n++)
+				noop(&no, engines[n], 0,
+				     no.max_age, age++);
+		gem_sync(no.fd, no.batch);
+		igt_info("Completed %lu cycles\n", age);
+	}
+
+	igt_subtest_f("contexts") {
+		unsigned int timeout = 5;
+		unsigned long ctx_age = 0;
+		unsigned long obj_age = 0;
+		const unsigned int ncontexts = 1024;
+		uint32_t contexts[ncontexts];
+
+		gem_require_contexts(no.fd);
+
+		for (n = 0; n < ncontexts; n++)
+			contexts[n] = gem_context_create(no.fd);
+
+		igt_until_timeout(timeout) {
+			for (n = 0; n < nengine; n++) {
+				noop(&no, engines[n],
+				     contexts[ctx_age % ncontexts],
+				     no.max_age, obj_age);
+				obj_age++;
+			}
+			ctx_age++;
+		}
+		gem_sync(no.fd, no.batch);
+		igt_info("Completed %lu cycles across %lu context switches\n",
+			 obj_age, ctx_age);
+
+		for (n = 0; n < ncontexts; n++)
+			gem_context_destroy(no.fd, contexts[n]);
+	}
+}
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
new file mode 100644
index 00000000..0462ce84
--- /dev/null
+++ b/tests/i915/gem_exec_schedule.c
@@ -0,0 +1,1358 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "config.h"
+
+#include <sys/poll.h>
+#include <sys/ioctl.h>
+#include <sched.h>
+#include <signal.h>
+
+#include "igt.h"
+#include "igt_vgem.h"
+#include "igt_rand.h"
+#include "igt_sysfs.h"
+#include "i915/gem_ring.h"
+
+#define LO 0
+#define HI 1
+#define NOISE 2
+
+#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
+#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
+
+#define MAX_ELSP_QLEN 16
+
+#define MAX_ENGINES 16
+
+#define MAX_CONTEXTS 1024
+
+IGT_TEST_DESCRIPTION("Check that we can control the order of execution");
+
+static uint32_t __store_dword(int fd, uint32_t ctx, unsigned ring,
+			      uint32_t target, uint32_t offset, uint32_t value,
+			      uint32_t cork, unsigned write_domain)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[16];
+	int i;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj + !cork);
+	execbuf.buffer_count = 2 + !!cork;
+	execbuf.flags = ring;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+	execbuf.rsvd1 = ctx;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = cork;
+	obj[1].handle = target;
+	obj[2].handle = gem_create(fd, 4096);
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj[1].handle;
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = offset;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = write_domain;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+	obj[2].relocation_count = 1;
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = offset;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = offset;
+		reloc.offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = offset;
+	}
+	batch[++i] = value;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(fd, obj[2].handle, 0, batch, sizeof(batch));
+	gem_execbuf(fd, &execbuf);
+
+	return obj[2].handle;
+}
+
+static void store_dword(int fd, uint32_t ctx, unsigned ring,
+			uint32_t target, uint32_t offset, uint32_t value,
+			uint32_t cork, unsigned write_domain)
+{
+	gem_close(fd, __store_dword(fd, ctx, ring,
+				    target, offset, value,
+				    cork, write_domain));
+}
+
+static uint32_t create_highest_priority(int fd)
+{
+	uint32_t ctx = gem_context_create(fd);
+
+	/*
+	 * If there is no priority support, all contexts will have equal
+	 * priority (and therefore the max user priority), so no context
+	 * can overtake us, and we effectively can form a plug.
+	 */
+	__gem_context_set_priority(fd, ctx, MAX_PRIO);
+
+	return ctx;
+}
+
+static void unplug_show_queue(int fd, struct igt_cork *c, unsigned int engine)
+{
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		const struct igt_spin_factory opts = {
+			.ctx = create_highest_priority(fd),
+			.engine = engine,
+		};
+		spin[n] = __igt_spin_batch_factory(fd, &opts);
+		gem_context_destroy(fd, opts.ctx);
+	}
+
+	igt_cork_unplug(c); /* batches will now be queued on the engine */
+	igt_debugfs_dump(fd, "i915_engine_info");
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++)
+		igt_spin_batch_free(fd, spin[n]);
+
+}
+
+static void fifo(int fd, unsigned ring)
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t scratch, plug;
+	uint32_t *ptr;
+
+	scratch = gem_create(fd, 4096);
+
+	plug = igt_cork_plug(&cork, fd);
+
+	/* Same priority, same timeline, final result will be the second eb */
+	store_dword(fd, 0, ring, scratch, 0, 1, plug, 0);
+	store_dword(fd, 0, ring, scratch, 0, 2, plug, 0);
+
+	unplug_show_queue(fd, &cork, ring);
+	gem_close(fd, plug);
+
+	ptr = gem_mmap__gtt(fd, scratch, 4096, PROT_READ);
+	gem_set_domain(fd, scratch, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(fd, scratch);
+
+	igt_assert_eq_u32(ptr[0], 2);
+	munmap(ptr, 4096);
+}
+
+static void independent(int fd, unsigned int engine)
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t scratch, plug, batch;
+	igt_spin_t *spin = NULL;
+	unsigned int other;
+	uint32_t *ptr;
+
+	igt_require(engine != 0);
+
+	scratch = gem_create(fd, 4096);
+	ptr = gem_mmap__gtt(fd, scratch, 4096, PROT_READ);
+	igt_assert_eq(ptr[0], 0);
+
+	plug = igt_cork_plug(&cork, fd);
+
+	/* Check that we can submit to engine while all others are blocked */
+	for_each_physical_engine(fd, other) {
+		if (other == engine)
+			continue;
+
+		if (!gem_can_store_dword(fd, other))
+			continue;
+
+		if (spin == NULL) {
+			spin = __igt_spin_batch_new(fd, .engine = other);
+		} else {
+			struct drm_i915_gem_exec_object2 obj = {
+				.handle = spin->handle,
+			};
+			struct drm_i915_gem_execbuffer2 eb = {
+				.buffer_count = 1,
+				.buffers_ptr = to_user_pointer(&obj),
+				.flags = other,
+			};
+			gem_execbuf(fd, &eb);
+		}
+
+		store_dword(fd, 0, other, scratch, 0, other, plug, 0);
+	}
+	igt_require(spin);
+
+	/* Same priority, but different timeline (as different engine) */
+	batch = __store_dword(fd, 0, engine, scratch, 0, engine, plug, 0);
+
+	unplug_show_queue(fd, &cork, engine);
+	gem_close(fd, plug);
+
+	gem_sync(fd, batch);
+	igt_assert(!gem_bo_busy(fd, batch));
+	igt_assert(gem_bo_busy(fd, spin->handle));
+	gem_close(fd, batch);
+
+	/* Only the local engine should be free to complete. */
+	igt_assert(gem_bo_busy(fd, scratch));
+	igt_assert_eq(ptr[0], engine);
+
+	igt_spin_batch_free(fd, spin);
+	gem_quiescent_gpu(fd);
+
+	/* And we expect the others to have overwritten us, order unspecified */
+	igt_assert(!gem_bo_busy(fd, scratch));
+	igt_assert_neq(ptr[0], engine);
+
+	munmap(ptr, 4096);
+	gem_close(fd, scratch);
+}
+
+static void smoketest(int fd, unsigned ring, unsigned timeout)
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	unsigned engines[MAX_ENGINES];
+	unsigned nengine;
+	unsigned engine;
+	uint32_t scratch;
+	uint32_t *ptr;
+
+	nengine = 0;
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, engine)
+			engines[nengine++] = engine;
+	} else {
+		engines[nengine++] = ring;
+	}
+	igt_require(nengine);
+
+	scratch = gem_create(fd, 4096);
+	igt_fork(child, ncpus) {
+		unsigned long count = 0;
+		uint32_t ctx;
+
+		hars_petruska_f54_1_random_perturb(child);
+
+		ctx = gem_context_create(fd);
+		igt_until_timeout(timeout) {
+			int prio;
+
+			prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
+			gem_context_set_priority(fd, ctx, prio);
+
+			engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
+			store_dword(fd, ctx, engine, scratch,
+				    8*child + 0, ~child,
+				    0, 0);
+			for (unsigned int step = 0; step < 8; step++)
+				store_dword(fd, ctx, engine, scratch,
+					    8*child + 4, count++,
+					    0, 0);
+		}
+		gem_context_destroy(fd, ctx);
+	}
+	igt_waitchildren();
+
+	ptr = gem_mmap__gtt(fd, scratch, 4096, PROT_READ);
+	gem_set_domain(fd, scratch, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(fd, scratch);
+
+	for (unsigned n = 0; n < ncpus; n++) {
+		igt_assert_eq_u32(ptr[2*n], ~n);
+		/*
+		 * Note this count is approximate due to unconstrained
+		 * ordering of the dword writes between engines.
+		 *
+		 * Take the result with a pinch of salt.
+		 */
+		igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
+	}
+	munmap(ptr, 4096);
+}
+
+static void reorder(int fd, unsigned ring, unsigned flags)
+#define EQUAL 1
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t scratch, plug;
+	uint32_t *ptr;
+	uint32_t ctx[2];
+
+	ctx[LO] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
+
+	ctx[HI] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
+
+	scratch = gem_create(fd, 4096);
+	plug = igt_cork_plug(&cork, fd);
+
+	/* We expect the high priority context to be executed first, and
+	 * so the final result will be value from the low priority context.
+	 */
+	store_dword(fd, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
+	store_dword(fd, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
+
+	unplug_show_queue(fd, &cork, ring);
+	gem_close(fd, plug);
+
+	gem_context_destroy(fd, ctx[LO]);
+	gem_context_destroy(fd, ctx[HI]);
+
+	ptr = gem_mmap__gtt(fd, scratch, 4096, PROT_READ);
+	gem_set_domain(fd, scratch, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(fd, scratch);
+
+	if (flags & EQUAL) /* equal priority, result will be fifo */
+		igt_assert_eq_u32(ptr[0], ctx[HI]);
+	else
+		igt_assert_eq_u32(ptr[0], ctx[LO]);
+	munmap(ptr, 4096);
+}
+
+static void promotion(int fd, unsigned ring)
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t result, dep;
+	uint32_t *ptr;
+	uint32_t ctx[3];
+	uint32_t plug;
+
+	ctx[LO] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
+
+	ctx[HI] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[HI], 0);
+
+	ctx[NOISE] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[NOISE], MIN_PRIO/2);
+
+	result = gem_create(fd, 4096);
+	dep = gem_create(fd, 4096);
+
+	plug = igt_cork_plug(&cork, fd);
+
+	/* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
+	 *
+	 * fifo would be NOISE, LO, HI.
+	 * strict priority would be  HI, NOISE, LO
+	 */
+	store_dword(fd, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
+	store_dword(fd, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
+
+	/* link LO <-> HI via a dependency on another buffer */
+	store_dword(fd, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
+	store_dword(fd, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
+
+	store_dword(fd, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
+
+	unplug_show_queue(fd, &cork, ring);
+	gem_close(fd, plug);
+
+	gem_context_destroy(fd, ctx[NOISE]);
+	gem_context_destroy(fd, ctx[LO]);
+	gem_context_destroy(fd, ctx[HI]);
+
+	ptr = gem_mmap__gtt(fd, dep, 4096, PROT_READ);
+	gem_set_domain(fd, dep, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(fd, dep);
+
+	igt_assert_eq_u32(ptr[0], ctx[HI]);
+	munmap(ptr, 4096);
+
+	ptr = gem_mmap__gtt(fd, result, 4096, PROT_READ);
+	gem_set_domain(fd, result, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(fd, result);
+
+	igt_assert_eq_u32(ptr[0], ctx[NOISE]);
+	munmap(ptr, 4096);
+}
+
+#define NEW_CTX (0x1 << 0)
+#define HANG_LP (0x1 << 1)
+static void preempt(int fd, unsigned ring, unsigned flags)
+{
+	uint32_t result = gem_create(fd, 4096);
+	uint32_t *ptr = gem_mmap__gtt(fd, result, 4096, PROT_READ);
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+	uint32_t ctx[2];
+	igt_hang_t hang;
+
+	ctx[LO] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
+
+	ctx[HI] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[HI], MAX_PRIO);
+
+	if (flags & HANG_LP)
+		hang = igt_hang_ctx(fd, ctx[LO], ring, 0);
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		if (flags & NEW_CTX) {
+			gem_context_destroy(fd, ctx[LO]);
+			ctx[LO] = gem_context_create(fd);
+			gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
+		}
+		spin[n] = __igt_spin_batch_new(fd,
+					       .ctx = ctx[LO],
+					       .engine = ring);
+		igt_debug("spin[%d].handle=%d\n", n, spin[n]->handle);
+
+		store_dword(fd, ctx[HI], ring, result, 0, n + 1, 0, I915_GEM_DOMAIN_RENDER);
+
+		gem_set_domain(fd, result, I915_GEM_DOMAIN_GTT, 0);
+		igt_assert_eq_u32(ptr[0], n + 1);
+		igt_assert(gem_bo_busy(fd, spin[0]->handle));
+	}
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++)
+		igt_spin_batch_free(fd, spin[n]);
+
+	if (flags & HANG_LP)
+		igt_post_hang_ring(fd, hang);
+
+	gem_context_destroy(fd, ctx[LO]);
+	gem_context_destroy(fd, ctx[HI]);
+
+	munmap(ptr, 4096);
+	gem_close(fd, result);
+}
+
+#define CHAIN 0x1
+#define CONTEXTS 0x2
+
+static igt_spin_t *__noise(int fd, uint32_t ctx, int prio, igt_spin_t *spin)
+{
+	unsigned other;
+
+	gem_context_set_priority(fd, ctx, prio);
+
+	for_each_physical_engine(fd, other) {
+		if (spin == NULL) {
+			spin = __igt_spin_batch_new(fd,
+						    .ctx = ctx,
+						    .engine = other);
+		} else {
+			struct drm_i915_gem_exec_object2 obj = {
+				.handle = spin->handle,
+			};
+			struct drm_i915_gem_execbuffer2 eb = {
+				.buffer_count = 1,
+				.buffers_ptr = to_user_pointer(&obj),
+				.rsvd1 = ctx,
+				.flags = other,
+			};
+			gem_execbuf(fd, &eb);
+		}
+	}
+
+	return spin;
+}
+
+static void __preempt_other(int fd,
+			    uint32_t *ctx,
+			    unsigned int target, unsigned int primary,
+			    unsigned flags)
+{
+	uint32_t result = gem_create(fd, 4096);
+	uint32_t *ptr = gem_mmap__gtt(fd, result, 4096, PROT_READ);
+	unsigned int n, i, other;
+
+	n = 0;
+	store_dword(fd, ctx[LO], primary,
+		    result, (n + 1)*sizeof(uint32_t), n + 1,
+		    0, I915_GEM_DOMAIN_RENDER);
+	n++;
+
+	if (flags & CHAIN) {
+		for_each_physical_engine(fd, other) {
+			store_dword(fd, ctx[LO], other,
+				    result, (n + 1)*sizeof(uint32_t), n + 1,
+				    0, I915_GEM_DOMAIN_RENDER);
+			n++;
+		}
+	}
+
+	store_dword(fd, ctx[HI], target,
+		    result, (n + 1)*sizeof(uint32_t), n + 1,
+		    0, I915_GEM_DOMAIN_RENDER);
+
+	igt_debugfs_dump(fd, "i915_engine_info");
+	gem_set_domain(fd, result, I915_GEM_DOMAIN_GTT, 0);
+
+	n++;
+	for (i = 0; i <= n; i++)
+		igt_assert_eq_u32(ptr[i], i);
+
+	munmap(ptr, 4096);
+	gem_close(fd, result);
+}
+
+static void preempt_other(int fd, unsigned ring, unsigned int flags)
+{
+	unsigned int primary;
+	igt_spin_t *spin = NULL;
+	uint32_t ctx[3];
+
+	/* On each engine, insert
+	 * [NOISE] spinner,
+	 * [LOW] write
+	 *
+	 * Then on our target engine do a [HIGH] write which should then
+	 * prompt its dependent LOW writes in front of the spinner on
+	 * each engine. The purpose of this test is to check that preemption
+	 * can cross engines.
+	 */
+
+	ctx[LO] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
+
+	ctx[NOISE] = gem_context_create(fd);
+	spin = __noise(fd, ctx[NOISE], 0, NULL);
+
+	ctx[HI] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[HI], MAX_PRIO);
+
+	for_each_physical_engine(fd, primary) {
+		igt_debug("Primary engine: %s\n", e__->name);
+		__preempt_other(fd, ctx, ring, primary, flags);
+
+	}
+
+	igt_assert(gem_bo_busy(fd, spin->handle));
+	igt_spin_batch_free(fd, spin);
+
+	gem_context_destroy(fd, ctx[LO]);
+	gem_context_destroy(fd, ctx[NOISE]);
+	gem_context_destroy(fd, ctx[HI]);
+}
+
+static void __preempt_queue(int fd,
+			    unsigned target, unsigned primary,
+			    unsigned depth, unsigned flags)
+{
+	uint32_t result = gem_create(fd, 4096);
+	uint32_t *ptr = gem_mmap__gtt(fd, result, 4096, PROT_READ);
+	igt_spin_t *above = NULL, *below = NULL;
+	unsigned int other, n, i;
+	int prio = MAX_PRIO;
+	uint32_t ctx[3] = {
+		gem_context_create(fd),
+		gem_context_create(fd),
+		gem_context_create(fd),
+	};
+
+	for (n = 0; n < depth; n++) {
+		if (flags & CONTEXTS) {
+			gem_context_destroy(fd, ctx[NOISE]);
+			ctx[NOISE] = gem_context_create(fd);
+		}
+		above = __noise(fd, ctx[NOISE], prio--, above);
+	}
+
+	gem_context_set_priority(fd, ctx[HI], prio--);
+
+	for (; n < MAX_ELSP_QLEN; n++) {
+		if (flags & CONTEXTS) {
+			gem_context_destroy(fd, ctx[NOISE]);
+			ctx[NOISE] = gem_context_create(fd);
+		}
+		below = __noise(fd, ctx[NOISE], prio--, below);
+	}
+
+	gem_context_set_priority(fd, ctx[LO], prio--);
+
+	n = 0;
+	store_dword(fd, ctx[LO], primary,
+		    result, (n + 1)*sizeof(uint32_t), n + 1,
+		    0, I915_GEM_DOMAIN_RENDER);
+	n++;
+
+	if (flags & CHAIN) {
+		for_each_physical_engine(fd, other) {
+			store_dword(fd, ctx[LO], other,
+				    result, (n + 1)*sizeof(uint32_t), n + 1,
+				    0, I915_GEM_DOMAIN_RENDER);
+			n++;
+		}
+	}
+
+	store_dword(fd, ctx[HI], target,
+		    result, (n + 1)*sizeof(uint32_t), n + 1,
+		    0, I915_GEM_DOMAIN_RENDER);
+
+	igt_debugfs_dump(fd, "i915_engine_info");
+
+	if (above) {
+		igt_assert(gem_bo_busy(fd, above->handle));
+		igt_spin_batch_free(fd, above);
+	}
+
+	gem_set_domain(fd, result, I915_GEM_DOMAIN_GTT, 0);
+
+	n++;
+	for (i = 0; i <= n; i++)
+		igt_assert_eq_u32(ptr[i], i);
+
+	if (below) {
+		igt_assert(gem_bo_busy(fd, below->handle));
+		igt_spin_batch_free(fd, below);
+	}
+
+	gem_context_destroy(fd, ctx[LO]);
+	gem_context_destroy(fd, ctx[NOISE]);
+	gem_context_destroy(fd, ctx[HI]);
+
+	munmap(ptr, 4096);
+	gem_close(fd, result);
+}
+
+static void preempt_queue(int fd, unsigned ring, unsigned int flags)
+{
+	unsigned other;
+
+	for_each_physical_engine(fd, other) {
+		for (unsigned depth = 0; depth <= MAX_ELSP_QLEN; depth++)
+			__preempt_queue(fd, ring, other, depth, flags);
+	}
+}
+
+static void preempt_self(int fd, unsigned ring)
+{
+	uint32_t result = gem_create(fd, 4096);
+	uint32_t *ptr = gem_mmap__gtt(fd, result, 4096, PROT_READ);
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+	unsigned int other;
+	unsigned int n, i;
+	uint32_t ctx[3];
+
+	/* On each engine, insert
+	 * [NOISE] spinner,
+	 * [self/LOW] write
+	 *
+	 * Then on our target engine do a [self/HIGH] write which should then
+	 * preempt its own lower priority task on any engine.
+	 */
+
+	ctx[NOISE] = gem_context_create(fd);
+
+	ctx[HI] = gem_context_create(fd);
+
+	n = 0;
+	gem_context_set_priority(fd, ctx[HI], MIN_PRIO);
+	for_each_physical_engine(fd, other) {
+		spin[n] = __igt_spin_batch_new(fd,
+					       .ctx = ctx[NOISE],
+					       .engine = other);
+		store_dword(fd, ctx[HI], other,
+			    result, (n + 1)*sizeof(uint32_t), n + 1,
+			    0, I915_GEM_DOMAIN_RENDER);
+		n++;
+	}
+	gem_context_set_priority(fd, ctx[HI], MAX_PRIO);
+	store_dword(fd, ctx[HI], ring,
+		    result, (n + 1)*sizeof(uint32_t), n + 1,
+		    0, I915_GEM_DOMAIN_RENDER);
+
+	gem_set_domain(fd, result, I915_GEM_DOMAIN_GTT, 0);
+
+	for (i = 0; i < n; i++) {
+		igt_assert(gem_bo_busy(fd, spin[i]->handle));
+		igt_spin_batch_free(fd, spin[i]);
+	}
+
+	n++;
+	for (i = 0; i <= n; i++)
+		igt_assert_eq_u32(ptr[i], i);
+
+	gem_context_destroy(fd, ctx[NOISE]);
+	gem_context_destroy(fd, ctx[HI]);
+
+	munmap(ptr, 4096);
+	gem_close(fd, result);
+}
+
+static void preemptive_hang(int fd, unsigned ring)
+{
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+	igt_hang_t hang;
+	uint32_t ctx[2];
+
+	ctx[HI] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[HI], MAX_PRIO);
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		ctx[LO] = gem_context_create(fd);
+		gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
+
+		spin[n] = __igt_spin_batch_new(fd,
+					       .ctx = ctx[LO],
+					       .engine = ring);
+
+		gem_context_destroy(fd, ctx[LO]);
+	}
+
+	hang = igt_hang_ctx(fd, ctx[HI], ring, 0);
+	igt_post_hang_ring(fd, hang);
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		/* Current behavior is to execute requests in order of submission.
+		 * This is subject to change as the scheduler evolve. The test should
+		 * be updated to reflect such changes.
+		 */
+		igt_assert(gem_bo_busy(fd, spin[n]->handle));
+		igt_spin_batch_free(fd, spin[n]);
+	}
+
+	gem_context_destroy(fd, ctx[HI]);
+}
+
+static void deep(int fd, unsigned ring)
+{
+#define XS 8
+	const unsigned int max_req = MAX_PRIO - MIN_PRIO;
+	const unsigned size = ALIGN(4*max_req, 4096);
+	struct timespec tv = {};
+	IGT_CORK_HANDLE(cork);
+	unsigned int nreq;
+	uint32_t plug;
+	uint32_t result, dep[XS];
+	uint32_t expected = 0;
+	uint32_t *ptr;
+	uint32_t *ctx;
+	int dep_nreq;
+	int n;
+
+	ctx = malloc(sizeof(*ctx) * MAX_CONTEXTS);
+	for (n = 0; n < MAX_CONTEXTS; n++) {
+		ctx[n] = gem_context_create(fd);
+	}
+
+	nreq = gem_measure_ring_inflight(fd, ring, 0) / (4 * XS) * MAX_CONTEXTS;
+	if (nreq > max_req)
+		nreq = max_req;
+	igt_info("Using %d requests (prio range %d)\n", nreq, max_req);
+
+	result = gem_create(fd, size);
+	for (int m = 0; m < XS; m ++)
+		dep[m] = gem_create(fd, size);
+
+	/* Bind all surfaces and contexts before starting the timeout. */
+	{
+		struct drm_i915_gem_exec_object2 obj[XS + 2];
+		struct drm_i915_gem_execbuffer2 execbuf;
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+		memset(obj, 0, sizeof(obj));
+		for (n = 0; n < XS; n++)
+			obj[n].handle = dep[n];
+		obj[XS].handle = result;
+		obj[XS+1].handle = gem_create(fd, 4096);
+		gem_write(fd, obj[XS+1].handle, 0, &bbe, sizeof(bbe));
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(obj);
+		execbuf.buffer_count = XS + 2;
+		execbuf.flags = ring;
+		for (n = 0; n < MAX_CONTEXTS; n++) {
+			execbuf.rsvd1 = ctx[n];
+			gem_execbuf(fd, &execbuf);
+		}
+		gem_close(fd, obj[XS+1].handle);
+		gem_sync(fd, result);
+	}
+
+	plug = igt_cork_plug(&cork, fd);
+
+	/* Create a deep dependency chain, with a few branches */
+	for (n = 0; n < nreq && igt_seconds_elapsed(&tv) < 2; n++) {
+		const int gen = intel_gen(intel_get_drm_devid(fd));
+		struct drm_i915_gem_exec_object2 obj[3];
+		struct drm_i915_gem_relocation_entry reloc;
+		struct drm_i915_gem_execbuffer2 eb = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 3,
+			.flags = ring | (gen < 6 ? I915_EXEC_SECURE : 0),
+			.rsvd1 = ctx[n % MAX_CONTEXTS],
+		};
+		uint32_t batch[16];
+		int i;
+
+		memset(obj, 0, sizeof(obj));
+		obj[0].handle = plug;
+
+		memset(&reloc, 0, sizeof(reloc));
+		reloc.presumed_offset = 0;
+		reloc.offset = sizeof(uint32_t);
+		reloc.delta = sizeof(uint32_t) * n;
+		reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+		obj[2].handle = gem_create(fd, 4096);
+		obj[2].relocs_ptr = to_user_pointer(&reloc);
+		obj[2].relocation_count = 1;
+
+		i = 0;
+		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			batch[++i] = reloc.delta;
+			batch[++i] = 0;
+		} else if (gen >= 4) {
+			batch[++i] = 0;
+			batch[++i] = reloc.delta;
+			reloc.offset += sizeof(uint32_t);
+		} else {
+			batch[i]--;
+			batch[++i] = reloc.delta;
+		}
+		batch[++i] = eb.rsvd1;
+		batch[++i] = MI_BATCH_BUFFER_END;
+		gem_write(fd, obj[2].handle, 0, batch, sizeof(batch));
+
+		gem_context_set_priority(fd, eb.rsvd1, MAX_PRIO - nreq + n);
+		for (int m = 0; m < XS; m++) {
+			obj[1].handle = dep[m];
+			reloc.target_handle = obj[1].handle;
+			gem_execbuf(fd, &eb);
+		}
+		gem_close(fd, obj[2].handle);
+	}
+	igt_info("First deptree: %d requests [%.3fs]\n",
+		 n * XS, 1e-9*igt_nsec_elapsed(&tv));
+	dep_nreq = n;
+
+	for (n = 0; n < nreq && igt_seconds_elapsed(&tv) < 4; n++) {
+		uint32_t context = ctx[n % MAX_CONTEXTS];
+		gem_context_set_priority(fd, context, MAX_PRIO - nreq + n);
+
+		for (int m = 0; m < XS; m++) {
+			store_dword(fd, context, ring, result, 4*n, context, dep[m], 0);
+			store_dword(fd, context, ring, result, 4*m, context, 0, I915_GEM_DOMAIN_INSTRUCTION);
+		}
+		expected = context;
+	}
+	igt_info("Second deptree: %d requests [%.3fs]\n",
+		 n * XS, 1e-9*igt_nsec_elapsed(&tv));
+
+	unplug_show_queue(fd, &cork, ring);
+	gem_close(fd, plug);
+	igt_require(expected); /* too slow */
+
+	for (n = 0; n < MAX_CONTEXTS; n++)
+		gem_context_destroy(fd, ctx[n]);
+
+	for (int m = 0; m < XS; m++) {
+		ptr = gem_mmap__gtt(fd, dep[m], size, PROT_READ);
+		gem_set_domain(fd, dep[m], /* no write hazard lies! */
+				I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+		gem_close(fd, dep[m]);
+
+		for (n = 0; n < dep_nreq; n++)
+			igt_assert_eq_u32(ptr[n], ctx[n % MAX_CONTEXTS]);
+		munmap(ptr, size);
+	}
+
+	ptr = gem_mmap__gtt(fd, result, size, PROT_READ);
+	gem_set_domain(fd, result, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(fd, result);
+
+	/* No reordering due to PI on all contexts because of the common dep */
+	for (int m = 0; m < XS; m++)
+		igt_assert_eq_u32(ptr[m], expected);
+	munmap(ptr, size);
+
+	free(ctx);
+#undef XS
+}
+
+static void alarm_handler(int sig)
+{
+}
+
+static int __execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+	int err = 0;
+	if (ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf))
+		err = -errno;
+	return err;
+}
+
+static void wide(int fd, unsigned ring)
+{
+	struct timespec tv = {};
+	unsigned int ring_size = gem_measure_ring_inflight(fd, ring, MEASURE_RING_NEW_CTX);
+
+	IGT_CORK_HANDLE(cork);
+	uint32_t plug;
+	uint32_t result;
+	uint32_t *ptr;
+	uint32_t *ctx;
+	unsigned int count;
+
+	ctx = malloc(sizeof(*ctx)*MAX_CONTEXTS);
+	for (int n = 0; n < MAX_CONTEXTS; n++)
+		ctx[n] = gem_context_create(fd);
+
+	result = gem_create(fd, 4*MAX_CONTEXTS);
+
+	plug = igt_cork_plug(&cork, fd);
+
+	/* Lots of in-order requests, plugged and submitted simultaneously */
+	for (count = 0;
+	     igt_seconds_elapsed(&tv) < 5 && count < ring_size;
+	     count++) {
+		for (int n = 0; n < MAX_CONTEXTS; n++) {
+			store_dword(fd, ctx[n], ring, result, 4*n, ctx[n], plug, I915_GEM_DOMAIN_INSTRUCTION);
+		}
+	}
+	igt_info("Submitted %d requests over %d contexts in %.1fms\n",
+		 count, MAX_CONTEXTS, igt_nsec_elapsed(&tv) * 1e-6);
+
+	unplug_show_queue(fd, &cork, ring);
+	gem_close(fd, plug);
+
+	for (int n = 0; n < MAX_CONTEXTS; n++)
+		gem_context_destroy(fd, ctx[n]);
+
+	ptr = gem_mmap__gtt(fd, result, 4*MAX_CONTEXTS, PROT_READ);
+	gem_set_domain(fd, result, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (int n = 0; n < MAX_CONTEXTS; n++)
+		igt_assert_eq_u32(ptr[n], ctx[n]);
+	munmap(ptr, 4*MAX_CONTEXTS);
+
+	gem_close(fd, result);
+	free(ctx);
+}
+
+static void reorder_wide(int fd, unsigned ring)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct timespec tv = {};
+	unsigned int ring_size = gem_measure_ring_inflight(fd, ring, MEASURE_RING_NEW_CTX);
+	IGT_CORK_HANDLE(cork);
+	uint32_t result, target, plug;
+	uint32_t *found, *expected;
+
+	result = gem_create(fd, 4096);
+	target = gem_create(fd, 4096);
+	plug = igt_cork_plug(&cork, fd);
+
+	expected = gem_mmap__cpu(fd, target, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, target, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = plug;
+	obj[1].handle = result;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+	obj[2].relocation_count = 1;
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = result;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = 0; /* lies */
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 3;
+	execbuf.flags = ring;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	for (int n = MIN_PRIO, x = 1;
+	     igt_seconds_elapsed(&tv) < 5 && n <= MAX_PRIO;
+	     n++, x++) {
+		unsigned int sz = ALIGN(ring_size * 64, 4096);
+		uint32_t *batch;
+
+		execbuf.rsvd1 = gem_context_create(fd);
+		gem_context_set_priority(fd, execbuf.rsvd1, n);
+
+		obj[2].handle = gem_create(fd, sz);
+		batch = gem_mmap__gtt(fd, obj[2].handle, sz, PROT_WRITE);
+		gem_set_domain(fd, obj[2].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+		for (int m = 0; m < ring_size; m++) {
+			uint64_t addr;
+			int idx = hars_petruska_f54_1_random_unsafe_max(1024);
+			int i;
+
+			execbuf.batch_start_offset = m * 64;
+			reloc.offset = execbuf.batch_start_offset + sizeof(uint32_t);
+			reloc.delta = idx * sizeof(uint32_t);
+			addr = reloc.presumed_offset + reloc.delta;
+
+			i = execbuf.batch_start_offset / sizeof(uint32_t);
+			batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			if (gen >= 8) {
+				batch[++i] = addr;
+				batch[++i] = addr >> 32;
+			} else if (gen >= 4) {
+				batch[++i] = 0;
+				batch[++i] = addr;
+				reloc.offset += sizeof(uint32_t);
+			} else {
+				batch[i]--;
+				batch[++i] = addr;
+			}
+			batch[++i] = x;
+			batch[++i] = MI_BATCH_BUFFER_END;
+
+			if (!expected[idx])
+				expected[idx] =  x;
+
+			gem_execbuf(fd, &execbuf);
+		}
+
+		munmap(batch, sz);
+		gem_close(fd, obj[2].handle);
+		gem_context_destroy(fd, execbuf.rsvd1);
+	}
+
+	unplug_show_queue(fd, &cork, ring);
+	gem_close(fd, plug);
+
+	found = gem_mmap__gtt(fd, result, 4096, PROT_READ);
+	gem_set_domain(fd, result, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (int n = 0; n < 1024; n++)
+		igt_assert_eq_u32(found[n], expected[n]);
+	munmap(found, 4096);
+	munmap(expected, 4096);
+
+	gem_close(fd, result);
+	gem_close(fd, target);
+}
+
+static void bind_to_cpu(int cpu)
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	struct sched_param rt = {.sched_priority = 99 };
+	cpu_set_t allowed;
+
+	igt_assert(sched_setscheduler(getpid(), SCHED_RR | SCHED_RESET_ON_FORK, &rt) == 0);
+
+	CPU_ZERO(&allowed);
+	CPU_SET(cpu % ncpus, &allowed);
+	igt_assert(sched_setaffinity(getpid(), sizeof(cpu_set_t), &allowed) == 0);
+}
+
+static void test_pi_ringfull(int fd, unsigned int engine)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct sigaction sa = { .sa_handler = alarm_handler };
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	unsigned int last, count;
+	struct itimerval itv;
+	IGT_CORK_HANDLE(c);
+	bool *result;
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(result != MAP_FAILED);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	memset(&obj, 0, sizeof(obj));
+
+	obj[1].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 1;
+	execbuf.flags = engine;
+	execbuf.rsvd1 = gem_context_create(fd);
+	gem_context_set_priority(fd, execbuf.rsvd1, MIN_PRIO);
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, obj[1].handle);
+
+	/* Fill the low-priority ring */
+	obj[0].handle = igt_cork_plug(&c, fd);
+
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+
+	sigaction(SIGALRM, &sa, NULL);
+	itv.it_interval.tv_sec = 0;
+	itv.it_interval.tv_usec = 1000;
+	itv.it_value.tv_sec = 0;
+	itv.it_value.tv_usec = 10000;
+	setitimer(ITIMER_REAL, &itv, NULL);
+
+	last = -1;
+	count = 0;
+	do {
+		if (__execbuf(fd, &execbuf) == 0) {
+			count++;
+			continue;
+		}
+
+		if (last == count)
+			break;
+
+		last = count;
+	} while (1);
+	igt_debug("Filled low-priority ring with %d batches\n", count);
+
+	memset(&itv, 0, sizeof(itv));
+	setitimer(ITIMER_REAL, &itv, NULL);
+
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 1;
+
+	/* both parent + child on the same cpu, only parent is RT */
+	bind_to_cpu(0);
+
+	igt_fork(child, 1) {
+		result[0] = true;
+
+		igt_debug("Creating HP context\n");
+		execbuf.rsvd1 = gem_context_create(fd);
+		gem_context_set_priority(fd, execbuf.rsvd1, MAX_PRIO);
+
+		kill(getppid(), SIGALRM);
+		sched_yield();
+		result[1] = true;
+
+		itv.it_value.tv_sec = 0;
+		itv.it_value.tv_usec = 10000;
+		setitimer(ITIMER_REAL, &itv, NULL);
+
+		/* Since we are the high priority task, we expect to be
+		 * able to add ourselves to *our* ring without interruption.
+		 */
+		igt_debug("HP child executing\n");
+		result[2] = __execbuf(fd, &execbuf) == 0;
+		gem_context_destroy(fd, execbuf.rsvd1);
+	}
+
+	/* Relinquish CPU just to allow child to create a context */
+	sleep(1);
+	igt_assert_f(result[0], "HP context (child) not created");
+	igt_assert_f(!result[1], "Child released too early!\n");
+
+	/* Parent sleeps waiting for ringspace, releasing child */
+	itv.it_value.tv_sec = 0;
+	itv.it_value.tv_usec = 50000;
+	setitimer(ITIMER_REAL, &itv, NULL);
+	igt_debug("LP parent executing\n");
+	igt_assert_eq(__execbuf(fd, &execbuf), -EINTR);
+	igt_assert_f(result[1], "Child was not released!\n");
+	igt_assert_f(result[2],
+		     "High priority child unable to submit within 10ms\n");
+
+	igt_cork_unplug(&c);
+	igt_waitchildren();
+
+	gem_context_destroy(fd, execbuf.rsvd1);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[0].handle);
+	munmap(result, 4096);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		gem_submission_print_method(fd);
+		gem_scheduler_print_capability(fd);
+
+		igt_require_gem(fd);
+		gem_require_mmap_wc(fd);
+		gem_require_contexts(fd);
+
+		igt_fork_hang_detector(fd);
+	}
+
+	igt_subtest_group {
+		for (e = intel_execution_engines; e->name; e++) {
+			/* default exec-id is purely symbolic */
+			if (e->exec_id == 0)
+				continue;
+
+			igt_subtest_f("fifo-%s", e->name) {
+				igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+				igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
+				fifo(fd, e->exec_id | e->flags);
+			}
+
+			igt_subtest_f("independent-%s", e->name) {
+				igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+				igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
+				independent(fd, e->exec_id | e->flags);
+			}
+		}
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_scheduler_enabled(fd));
+			igt_require(gem_scheduler_has_ctx_priority(fd));
+		}
+
+		igt_subtest("smoketest-all")
+			smoketest(fd, ALL_ENGINES, 30);
+
+		for (e = intel_execution_engines; e->name; e++) {
+			if (e->exec_id == 0)
+				continue;
+
+			igt_subtest_group {
+				igt_fixture {
+					igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+					igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
+				}
+
+				igt_subtest_f("in-order-%s", e->name)
+					reorder(fd, e->exec_id | e->flags, EQUAL);
+
+				igt_subtest_f("out-order-%s", e->name)
+					reorder(fd, e->exec_id | e->flags, 0);
+
+				igt_subtest_f("promotion-%s", e->name)
+					promotion(fd, e->exec_id | e->flags);
+
+				igt_subtest_group {
+					igt_fixture {
+						igt_require(gem_scheduler_has_preemption(fd));
+					}
+
+					igt_subtest_f("preempt-%s", e->name)
+						preempt(fd, e->exec_id | e->flags, 0);
+
+					igt_subtest_f("preempt-contexts-%s", e->name)
+						preempt(fd, e->exec_id | e->flags, NEW_CTX);
+
+					igt_subtest_f("preempt-self-%s", e->name)
+						preempt_self(fd, e->exec_id | e->flags);
+
+					igt_subtest_f("preempt-other-%s", e->name)
+						preempt_other(fd, e->exec_id | e->flags, 0);
+
+					igt_subtest_f("preempt-other-chain-%s", e->name)
+						preempt_other(fd, e->exec_id | e->flags, CHAIN);
+
+					igt_subtest_f("preempt-queue-%s", e->name)
+						preempt_queue(fd, e->exec_id | e->flags, 0);
+
+					igt_subtest_f("preempt-queue-chain-%s", e->name)
+						preempt_queue(fd, e->exec_id | e->flags, CHAIN);
+					igt_subtest_f("preempt-queue-contexts-%s", e->name)
+						preempt_queue(fd, e->exec_id | e->flags, CONTEXTS);
+
+					igt_subtest_f("preempt-queue-contexts-chain-%s", e->name)
+						preempt_queue(fd, e->exec_id | e->flags, CONTEXTS | CHAIN);
+
+					igt_subtest_group {
+						igt_hang_t hang;
+
+						igt_fixture {
+							igt_stop_hang_detector();
+							hang = igt_allow_hang(fd, 0, 0);
+						}
+
+						igt_subtest_f("preempt-hang-%s", e->name) {
+							preempt(fd, e->exec_id | e->flags, NEW_CTX | HANG_LP);
+						}
+
+						igt_subtest_f("preemptive-hang-%s", e->name)
+							preemptive_hang(fd, e->exec_id | e->flags);
+
+						igt_fixture {
+							igt_disallow_hang(fd, hang);
+							igt_fork_hang_detector(fd);
+						}
+					}
+				}
+
+				igt_subtest_f("deep-%s", e->name)
+					deep(fd, e->exec_id | e->flags);
+
+				igt_subtest_f("wide-%s", e->name)
+					wide(fd, e->exec_id | e->flags);
+
+				igt_subtest_f("reorder-wide-%s", e->name)
+					reorder_wide(fd, e->exec_id | e->flags);
+
+				igt_subtest_f("smoketest-%s", e->name)
+					smoketest(fd, e->exec_id | e->flags, 5);
+			}
+		}
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_scheduler_enabled(fd));
+			igt_require(gem_scheduler_has_ctx_priority(fd));
+
+			/* need separate rings */
+			igt_require(gem_has_execlists(fd));
+		}
+
+		for (e = intel_execution_engines; e->name; e++) {
+			if (e->exec_id == 0)
+				continue;
+
+			igt_subtest_group {
+				igt_fixture {
+					igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+					igt_require(gem_scheduler_has_preemption(fd));
+				}
+
+				igt_subtest_f("pi-ringfull-%s", e->name)
+					test_pi_ringfull(fd, e->exec_id | e->flags);
+			}
+		}
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_store.c b/tests/i915/gem_exec_store.c
new file mode 100644
index 00000000..a7673489
--- /dev/null
+++ b/tests/i915/gem_exec_store.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/** @file gem_exec_store.c
+ *
+ * Simplest non-NOOP only batch with verification.
+ */
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_gt.h"
+#include <strings.h>
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+static void store_dword(int fd, unsigned ring)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[16];
+	int i;
+
+	gem_require_ring(fd, ring);
+	igt_require(gem_can_store_dword(fd, ring));
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+	if (gen > 3 && gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, 4096);
+	obj[1].handle = gem_create(fd, 4096);
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj[0].handle;
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = 0;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	obj[1].relocs_ptr = to_user_pointer(&reloc);
+	obj[1].relocation_count = 1;
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = 0;
+		reloc.offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = 0;
+	}
+	batch[++i] = 0xc0ffee;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, obj[1].handle);
+
+	gem_read(fd, obj[0].handle, 0, batch, sizeof(batch));
+	gem_close(fd, obj[0].handle);
+	igt_assert_eq(*batch, 0xc0ffee);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+#define PAGES 1
+static void store_cachelines(int fd, unsigned ring, unsigned int flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 *obj;
+	struct drm_i915_gem_relocation_entry *reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+#define NCACHELINES (4096/64)
+	uint32_t *batch;
+	int i;
+
+	reloc = calloc(NCACHELINES, sizeof(*reloc));
+	igt_assert(reloc);
+
+	gem_require_ring(fd, ring);
+	igt_require(gem_can_store_dword(fd, ring));
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffer_count = flags & PAGES ? NCACHELINES + 1 : 2;
+	execbuf.flags = ring;
+	if (gen > 3 && gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	obj = calloc(execbuf.buffer_count, sizeof(*obj));
+	igt_assert(obj);
+	for (i = 0; i < execbuf.buffer_count; i++)
+		obj[i].handle = gem_create(fd, 4096);
+	obj[i-1].relocs_ptr = to_user_pointer(reloc);
+	obj[i-1].relocation_count = NCACHELINES;
+	execbuf.buffers_ptr = to_user_pointer(obj);
+
+	batch = gem_mmap__cpu(fd, obj[i-1].handle, 0, 4096, PROT_WRITE);
+
+	i = 0;
+	for (unsigned n = 0; n < NCACHELINES; n++) {
+		reloc[n].target_handle = obj[n % (execbuf.buffer_count-1)].handle;
+		reloc[n].presumed_offset = -1;
+		reloc[n].offset = (i + 1)*sizeof(uint32_t);
+		reloc[n].delta = 4 * (n * 16 + n % 16);
+		reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[n].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			batch[++i] = 0;
+			batch[++i] = 0;
+		} else if (gen >= 4) {
+			batch[++i] = 0;
+			batch[++i] = 0;
+			reloc[n].offset += sizeof(uint32_t);
+		} else {
+			batch[i]--;
+			batch[++i] = 0;
+		}
+		batch[++i] = n | ~n << 16;
+		i++;
+	}
+	batch[i++] = MI_BATCH_BUFFER_END;
+	igt_assert(i < 4096 / sizeof(*batch));
+	munmap(batch, 4096);
+	gem_execbuf(fd, &execbuf);
+
+	for (unsigned n = 0; n < NCACHELINES; n++) {
+		uint32_t result;
+
+		gem_read(fd, reloc[n].target_handle, reloc[n].delta,
+			 &result, sizeof(result));
+
+		igt_assert_eq_u32(result, n | ~n << 16);
+	}
+	free(reloc);
+
+	for (unsigned n = 0; n < execbuf.buffer_count; n++)
+		gem_close(fd, obj[n].handle);
+	free(obj);
+
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void store_all(int fd)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[32];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned engines[16], permuted[16];
+	uint32_t batch[16];
+	uint64_t offset;
+	unsigned engine, nengine;
+	int value;
+	int i, j;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	memset(reloc, 0, sizeof(reloc));
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, 4096);
+	obj[1].handle = gem_create(fd, 4096);
+	obj[1].relocation_count = 1;
+
+	offset = sizeof(uint32_t);
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = 0;
+		offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = 0;
+	}
+	batch[value = ++i] = 0xc0ffee;
+	batch[++i] = MI_BATCH_BUFFER_END;
+
+	nengine = 0;
+	intel_detect_and_clear_missed_interrupts(fd);
+	for_each_physical_engine(fd, engine) {
+		if (!gem_can_store_dword(fd, engine))
+			continue;
+
+		igt_assert(2*(nengine+1)*sizeof(batch) <= 4096);
+
+		execbuf.flags &= ~ENGINE_MASK;
+		execbuf.flags |= engine;
+
+		j = 2*nengine;
+		reloc[j].target_handle = obj[0].handle;
+		reloc[j].presumed_offset = ~0;
+		reloc[j].offset = j*sizeof(batch) + offset;
+		reloc[j].delta = nengine*sizeof(uint32_t);
+		reloc[j].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[j].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+		obj[1].relocs_ptr = to_user_pointer(&reloc[j]);
+
+		batch[value] = 0xdeadbeef;
+		gem_write(fd, obj[1].handle, j*sizeof(batch),
+			  batch, sizeof(batch));
+		execbuf.batch_start_offset = j*sizeof(batch);
+		gem_execbuf(fd, &execbuf);
+
+		j = 2*nengine + 1;
+		reloc[j].target_handle = obj[0].handle;
+		reloc[j].presumed_offset = ~0;
+		reloc[j].offset = j*sizeof(batch) + offset;
+		reloc[j].delta = nengine*sizeof(uint32_t);
+		reloc[j].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[j].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+		obj[1].relocs_ptr = to_user_pointer(&reloc[j]);
+
+		batch[value] = nengine;
+		gem_write(fd, obj[1].handle, j*sizeof(batch),
+			  batch, sizeof(batch));
+		execbuf.batch_start_offset = j*sizeof(batch);
+		gem_execbuf(fd, &execbuf);
+
+		engines[nengine++] = engine;
+	}
+	gem_sync(fd, obj[1].handle);
+
+	for (i = 0; i < nengine; i++) {
+		obj[1].relocs_ptr = to_user_pointer(&reloc[2*i]);
+		execbuf.batch_start_offset = 2*i*sizeof(batch);
+		memcpy(permuted, engines, nengine*sizeof(engines[0]));
+		igt_permute_array(permuted, nengine, igt_exchange_int);
+		for (j = 0; j < nengine; j++) {
+			execbuf.flags &= ~ENGINE_MASK;
+			execbuf.flags |= permuted[j];
+			gem_execbuf(fd, &execbuf);
+		}
+		obj[1].relocs_ptr = to_user_pointer(&reloc[2*i+1]);
+		execbuf.batch_start_offset = (2*i+1)*sizeof(batch);
+		execbuf.flags &= ~ENGINE_MASK;
+		execbuf.flags |= engines[i];
+		gem_execbuf(fd, &execbuf);
+	}
+	gem_close(fd, obj[1].handle);
+
+	gem_read(fd, obj[0].handle, 0, engines, sizeof(engines));
+	gem_close(fd, obj[0].handle);
+
+	for (i = 0; i < nengine; i++)
+		igt_assert_eq_u32(engines[i], i);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static int print_welcome(int fd)
+{
+	uint16_t devid = intel_get_drm_devid(fd);
+	const struct intel_device_info *info = intel_get_device_info(devid);
+	int err;
+
+	igt_info("Running on %s (pci-id %04x, gen %d)\n",
+		 info->codename, devid, ffs(info->gen));
+	igt_info("Can use MI_STORE_DWORD(virtual)? %s\n",
+		 gem_can_store_dword(fd, 0) ? "yes" : "no");
+
+	err = 0;
+	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_THROTTLE, 0))
+		err = -errno;
+	igt_info("GPU operation? %s [errno=%d]\n",
+		 err == 0 ? "yes" : "no", err);
+
+	return ffs(info->gen);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd;
+
+	igt_fixture {
+		int gen;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+
+		gen = print_welcome(fd);
+		if (gen > 3 && gen < 6) /* ctg and ilk need secure batches */
+			igt_device_set_master(fd);
+
+		igt_require_gem(fd);
+		igt_require(gem_can_store_dword(fd, 0));
+
+		igt_fork_hang_detector(fd);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("basic-%s", e->name)
+			store_dword(fd, e->exec_id | e->flags);
+
+		igt_subtest_f("cachelines-%s", e->name)
+			store_cachelines(fd, e->exec_id | e->flags, 0);
+
+		igt_subtest_f("pages-%s", e->name)
+			store_cachelines(fd, e->exec_id | e->flags, PAGES);
+	}
+
+	igt_subtest("basic-all")
+		store_all(fd);
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_suspend.c b/tests/i915/gem_exec_suspend.c
new file mode 100644
index 00000000..43c52d10
--- /dev/null
+++ b/tests/i915/gem_exec_suspend.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/** @file gem_exec_suspend.c
+ *
+ * Exercise executing batches across suspend before checking the results.
+ */
+
+#include "igt.h"
+#include "igt_gt.h"
+#include "igt_dummyload.h"
+
+#define NOSLEEP 0
+#define SUSPEND_DEVICES 1
+#define SUSPEND 2
+#define HIBERNATE_DEVICES 3
+#define HIBERNATE 4
+#define mode(x) ((x) & 0xff)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+#define UNCACHED (0<<8)
+#define CACHED (1<<8)
+#define HANG (2<<8)
+
+static void run_test(int fd, unsigned ring, unsigned flags);
+
+static void check_bo(int fd, uint32_t handle)
+{
+	uint32_t *map;
+	int i;
+
+	igt_debug("Verifying result\n");
+	map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_READ);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0);
+	for (i = 0; i < 1024; i++)
+		igt_assert_eq(map[i], i);
+	munmap(map, 4096);
+}
+
+static void test_all(int fd, unsigned flags)
+{
+	unsigned engine;
+
+	for_each_physical_engine(fd, engine)
+		if (gem_can_store_dword(fd, engine))
+			run_test(fd, engine, flags & ~0xff);
+}
+
+static bool has_semaphores(int fd)
+{
+	struct drm_i915_getparam gp;
+	int val = -1;
+
+	memset(&gp, 0, sizeof(gp));
+	gp.param = I915_PARAM_HAS_SEMAPHORES;
+	gp.value = &val;
+
+	drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	errno = 0;
+
+	return val > 0;
+}
+
+static void run_test(int fd, unsigned engine, unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned engines[16];
+	unsigned nengine;
+	igt_spin_t *spin = NULL;
+
+	nengine = 0;
+	if (engine == ALL_ENGINES) {
+		/* If we don't have semaphores, then every ring switch
+		 * will result in a CPU stall until the previous write
+		 * has finished. This is likely to hide any issue with
+		 * the GPU being active across the suspend (because the
+		 * GPU is then unlikely to be active!)
+		 */
+		if (has_semaphores(fd)) {
+			for_each_physical_engine(fd, engine) {
+				if (gem_can_store_dword(fd, engine))
+					engines[nengine++] = engine;
+			}
+		} else {
+			igt_require(gem_has_ring(fd, 0));
+			igt_require(gem_can_store_dword(fd, 0));
+			engines[nengine++] = 0;
+		}
+	} else {
+		igt_require(gem_has_ring(fd, engine));
+		igt_require(gem_can_store_dword(fd, engine));
+		engines[nengine++] = engine;
+	}
+	igt_require(nengine);
+
+	/* Before suspending, check normal operation */
+	if (mode(flags) != NOSLEEP)
+		test_all(fd, flags);
+
+	gem_quiescent_gpu(fd);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = 1 << 11;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, 4096);
+	gem_set_caching(fd, obj[0].handle, !!(flags & CACHED));
+	obj[0].flags |= EXEC_OBJECT_WRITE;
+	obj[1].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+	igt_require(__gem_execbuf(fd, &execbuf) == 0);
+	gem_close(fd, obj[1].handle);
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj[0].handle;
+	reloc.presumed_offset = obj[0].offset;
+	reloc.offset = sizeof(uint32_t);
+	if (gen >= 4 && gen < 8)
+		reloc.offset += sizeof(uint32_t);
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+	obj[1].relocs_ptr = to_user_pointer(&reloc);
+	obj[1].relocation_count = 1;
+
+	for (int i = 0; i < 1024; i++) {
+		uint64_t offset;
+		uint32_t buf[16];
+		int b;
+
+		obj[1].handle = gem_create(fd, 4096);
+
+		reloc.delta = i * sizeof(uint32_t);
+		offset = reloc.presumed_offset + reloc.delta;
+
+		b = 0;
+		buf[b] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			buf[++b] = offset;
+			buf[++b] = offset >> 32;
+		} else if (gen >= 4) {
+			buf[++b] = 0;
+			buf[++b] = offset;
+		} else {
+			buf[b] -= 1;
+			buf[++b] = offset;
+		}
+		buf[++b] = i;
+		buf[++b] = MI_BATCH_BUFFER_END;
+		gem_write(fd, obj[1].handle,
+			  4096-sizeof(buf), buf, sizeof(buf));
+		execbuf.flags &= ~ENGINE_MASK;
+		execbuf.flags |= engines[rand() % nengine];
+		gem_execbuf(fd, &execbuf);
+		gem_close(fd, obj[1].handle);
+	}
+
+	if (flags & HANG)
+		spin = igt_spin_batch_new(fd, .engine = engine);
+
+	switch (mode(flags)) {
+	case NOSLEEP:
+		break;
+
+	case SUSPEND_DEVICES:
+		igt_system_suspend_autoresume(SUSPEND_STATE_MEM,
+					      SUSPEND_TEST_DEVICES);
+		break;
+
+	case SUSPEND:
+		igt_system_suspend_autoresume(SUSPEND_STATE_MEM,
+					      SUSPEND_TEST_NONE);
+		break;
+
+	case HIBERNATE_DEVICES:
+		igt_system_suspend_autoresume(SUSPEND_STATE_DISK,
+					      SUSPEND_TEST_DEVICES);
+		break;
+
+	case HIBERNATE:
+		igt_system_suspend_autoresume(SUSPEND_STATE_DISK,
+					      SUSPEND_TEST_NONE);
+		break;
+	}
+
+	igt_spin_batch_free(fd, spin);
+
+	check_bo(fd, obj[0].handle);
+	gem_close(fd, obj[0].handle);
+
+	gem_quiescent_gpu(fd);
+
+	/* After resume, make sure it still works */
+	if (mode(flags) != NOSLEEP)
+		test_all(fd, flags);
+}
+
+igt_main
+{
+	const struct {
+		const char *suffix;
+		unsigned mode;
+	} modes[] = {
+		{ "", NOSLEEP },
+		{ "-S3", SUSPEND },
+		{ "-S4", HIBERNATE },
+		{ NULL, 0 }
+	}, *m;
+	const struct intel_execution_engine *e;
+	igt_hang_t hang;
+	int fd;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(fd);
+		igt_require(gem_can_store_dword(fd, 0));
+
+		igt_fork_hang_detector(fd);
+	}
+
+	igt_subtest("basic")
+		run_test(fd, ALL_ENGINES, NOSLEEP);
+	igt_subtest("basic-S3-devices")
+		run_test(fd, ALL_ENGINES, SUSPEND_DEVICES);
+	igt_subtest("basic-S3")
+		run_test(fd, ALL_ENGINES, SUSPEND);
+	igt_subtest("basic-S4-devices")
+		run_test(fd, ALL_ENGINES, HIBERNATE_DEVICES);
+	igt_subtest("basic-S4")
+		run_test(fd, ALL_ENGINES, HIBERNATE);
+
+	for (e = intel_execution_engines; e->name; e++) {
+		for (m = modes; m->suffix; m++) {
+			igt_subtest_f("%s-uncached%s", e->name, m->suffix)
+				run_test(fd, e->exec_id | e->flags,
+					 m->mode | UNCACHED);
+			igt_subtest_f("%s-cached%s", e->name, m->suffix)
+				run_test(fd, e->exec_id | e->flags,
+					 m->mode | CACHED);
+		}
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		hang = igt_allow_hang(fd, 0, 0);
+	}
+
+	igt_subtest("hang-S3")
+		run_test(fd, 0, SUSPEND | HANG);
+	igt_subtest("hang-S4")
+		run_test(fd, 0, HIBERNATE | HANG);
+
+	igt_fixture {
+		igt_disallow_hang(fd, hang);
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
new file mode 100644
index 00000000..81303f84
--- /dev/null
+++ b/tests/i915/gem_exec_whisper.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/** @file gem_exec_whisper.c
+ *
+ * Pass around a value to write into a scratch buffer between lots of batches
+ */
+
+#include "igt.h"
+#include "igt_gt.h"
+#include "igt_debugfs.h"
+#include "igt_rand.h"
+#include "igt_sysfs.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+#define VERIFY 0
+
+static void write_seqno(int dir, unsigned offset)
+{
+	uint32_t seqno = UINT32_MAX - offset;
+
+	igt_sysfs_printf(dir, "i915_next_seqno", "0x%x", seqno);
+
+	igt_debug("next seqno set to: 0x%x\n", seqno);
+}
+
+static void check_bo(int fd, uint32_t handle)
+{
+	uint32_t *map;
+	int i;
+
+	igt_debug("Verifying result\n");
+	map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_READ);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0);
+	for (i = 0; i < 1024; i++)
+		igt_assert_eq(map[i], i);
+	munmap(map, 4096);
+}
+
+static void verify_reloc(int fd, uint32_t handle,
+			 const struct drm_i915_gem_relocation_entry *reloc)
+{
+	if (VERIFY) {
+		uint64_t target = 0;
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			gem_read(fd, handle, reloc->offset, &target, 8);
+		else
+			gem_read(fd, handle, reloc->offset, &target, 4);
+		igt_assert_eq_u64(target,
+				  reloc->presumed_offset + reloc->delta);
+	}
+}
+
+#define CONTEXTS 0x1
+#define FDS 0x2
+#define INTERRUPTIBLE 0x4
+#define CHAIN 0x8
+#define FORKED 0x10
+#define HANG 0x20
+#define SYNC 0x40
+#define PRIORITY 0x80
+
+struct hang {
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	int fd;
+};
+
+static void init_hang(struct hang *h)
+{
+	uint32_t *batch;
+	int i, gen;
+
+	h->fd = drm_open_driver(DRIVER_INTEL);
+	igt_allow_hang(h->fd, 0, 0);
+
+	gen = intel_gen(intel_get_drm_devid(h->fd));
+
+	memset(&h->execbuf, 0, sizeof(h->execbuf));
+	h->execbuf.buffers_ptr = to_user_pointer(&h->obj);
+	h->execbuf.buffer_count = 1;
+
+	memset(&h->obj, 0, sizeof(h->obj));
+	h->obj.handle = gem_create(h->fd, 4096);
+
+	h->obj.relocs_ptr = to_user_pointer(&h->reloc);
+	h->obj.relocation_count = 1;
+	memset(&h->reloc, 0, sizeof(h->reloc));
+
+	batch = gem_mmap__cpu(h->fd, h->obj.handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(h->fd, h->obj.handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+	h->reloc.target_handle = h->obj.handle; /* recurse */
+	h->reloc.presumed_offset = 0;
+	h->reloc.offset = 5*sizeof(uint32_t);
+	h->reloc.delta = 0;
+	h->reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
+	h->reloc.write_domain = 0;
+
+	i = 0;
+	batch[i++] = 0xffffffff;
+	batch[i++] = 0xdeadbeef;
+	batch[i++] = 0xc00fee00;
+	batch[i++] = 0x00c00fee;
+	batch[i] = MI_BATCH_BUFFER_START;
+	if (gen >= 8) {
+		batch[i] |= 1 << 8 | 1;
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 6) {
+		batch[i] |= 1 << 8;
+		batch[++i] = 0;
+	} else {
+		batch[i] |= 2 << 6;
+		batch[++i] = 0;
+		if (gen < 4) {
+			batch[i] |= 1;
+			h->reloc.delta = 1;
+		}
+	}
+	munmap(batch, 4096);
+}
+
+static void submit_hang(struct hang *h, unsigned *engines, int nengine, unsigned flags)
+{
+	while (nengine--) {
+		h->execbuf.flags &= ~ENGINE_MASK;
+		h->execbuf.flags |= *engines++;
+		gem_execbuf(h->fd, &h->execbuf);
+	}
+	if (flags & SYNC)
+		gem_sync(h->fd, h->obj.handle);
+}
+
+static void fini_hang(struct hang *h)
+{
+	close(h->fd);
+}
+
+static void ctx_set_random_priority(int fd, uint32_t ctx)
+{
+	int prio = hars_petruska_f54_1_random_unsafe_max(1024) - 512;
+	gem_context_set_priority(fd, ctx, prio);
+};
+
+static void whisper(int fd, unsigned engine, unsigned flags)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 batches[1024];
+	struct drm_i915_gem_relocation_entry inter[1024];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_exec_object2 store, scratch;
+	struct drm_i915_gem_exec_object2 tmp[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct hang hang;
+	int fds[64];
+	uint32_t contexts[64];
+	unsigned engines[16];
+	unsigned nengine;
+	uint32_t batch[16];
+	int i, n, pass, loc;
+	unsigned int relocations = 0;
+	unsigned int reloc_migrations = 0;
+	unsigned int reloc_interruptions = 0;
+	unsigned int eb_migrations = 0;
+	uint64_t old_offset;
+	int debugfs;
+
+	if (flags & PRIORITY) {
+		igt_require(gem_scheduler_enabled(fd));
+		igt_require(gem_scheduler_has_ctx_priority(fd));
+	}
+
+	debugfs = igt_debugfs_dir(fd);
+
+	nengine = 0;
+	if (engine == ALL_ENGINES) {
+		for_each_physical_engine(fd, engine) {
+			if (gem_can_store_dword(fd, engine))
+				engines[nengine++] = engine;
+		}
+	} else {
+		igt_require(gem_has_ring(fd, engine));
+		igt_require(gem_can_store_dword(fd, engine));
+		engines[nengine++] = engine;
+	}
+	igt_require(nengine);
+
+	if (flags & FDS)
+		igt_require(gen >= 6);
+
+	if (flags & CONTEXTS)
+		gem_require_contexts(fd);
+
+	if (flags & HANG)
+		init_hang(&hang);
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
+		memset(&scratch, 0, sizeof(scratch));
+		scratch.handle = gem_create(fd, 4096);
+		scratch.flags = EXEC_OBJECT_WRITE;
+
+		memset(&store, 0, sizeof(store));
+		store.handle = gem_create(fd, 4096);
+		store.relocs_ptr = to_user_pointer(&reloc);
+		store.relocation_count = 1;
+
+		memset(&reloc, 0, sizeof(reloc));
+		reloc.offset = sizeof(uint32_t);
+		if (gen < 8 && gen >= 4)
+			reloc.offset += sizeof(uint32_t);
+		loc = 8;
+		if (gen >= 4)
+			loc += 4;
+		reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+		{
+			tmp[0] = scratch;
+			tmp[1] = store;
+			gem_write(fd, store.handle, 0, &bbe, sizeof(bbe));
+
+			memset(&execbuf, 0, sizeof(execbuf));
+			execbuf.buffers_ptr = to_user_pointer(tmp);
+			execbuf.buffer_count = 2;
+			execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
+			execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+			if (gen < 6)
+				execbuf.flags |= I915_EXEC_SECURE;
+			igt_require(__gem_execbuf(fd, &execbuf) == 0);
+			scratch = tmp[0];
+			store = tmp[1];
+		}
+
+		i = 0;
+		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			batch[++i] = store.offset + loc;
+			batch[++i] = (store.offset + loc) >> 32;
+		} else if (gen >= 4) {
+			batch[++i] = 0;
+			batch[++i] = store.offset + loc;
+		} else {
+			batch[i]--;
+			batch[++i] = store.offset + loc;
+		}
+		batch[++i] = 0xc0ffee;
+		igt_assert(loc == sizeof(uint32_t) * i);
+		batch[++i] = MI_BATCH_BUFFER_END;
+
+		if (flags & CONTEXTS) {
+			for (n = 0; n < 64; n++)
+				contexts[n] = gem_context_create(fd);
+		}
+		if (flags & FDS) {
+			for (n = 0; n < 64; n++)
+				fds[n] = drm_open_driver(DRIVER_INTEL);
+		}
+
+		memset(batches, 0, sizeof(batches));
+		for (n = 0; n < 1024; n++) {
+			batches[n].handle = gem_create(fd, 4096);
+			gem_write(fd, batches[n].handle, 0, &bbe, sizeof(bbe));
+		}
+		execbuf.buffers_ptr = to_user_pointer(batches);
+		execbuf.buffer_count = 1024;
+		gem_execbuf(fd, &execbuf);
+
+		execbuf.buffers_ptr = to_user_pointer(tmp);
+		execbuf.buffer_count = 2;
+
+		old_offset = store.offset;
+		for (n = 0; n < 1024; n++) {
+			if (gen >= 8) {
+				batch[1] = old_offset + loc;
+				batch[2] = (old_offset + loc) >> 32;
+			} else if (gen >= 4) {
+				batch[2] = old_offset + loc;
+			} else {
+				batch[1] = old_offset + loc;
+			}
+
+			inter[n] = reloc;
+			inter[n].presumed_offset = old_offset;
+			inter[n].delta = loc;
+			batches[n].relocs_ptr = to_user_pointer(&inter[n]);
+			batches[n].relocation_count = 1;
+			gem_write(fd, batches[n].handle, 0, batch, sizeof(batch));
+
+			old_offset = batches[n].offset;
+		}
+
+		igt_while_interruptible(flags & INTERRUPTIBLE) {
+			for (pass = 0; pass < 1024; pass++) {
+				uint64_t offset;
+
+				if (!(flags & FORKED))
+					write_seqno(debugfs, pass);
+
+				if (flags & HANG)
+					submit_hang(&hang, engines, nengine, flags);
+
+				if (flags & CHAIN) {
+					execbuf.flags &= ~ENGINE_MASK;
+					execbuf.flags |= engines[rand() % nengine];
+				}
+
+				reloc.presumed_offset = scratch.offset;
+				reloc.delta = 4*pass;
+				offset = reloc.presumed_offset + reloc.delta;
+
+				i = 0;
+				if (gen >= 8) {
+					batch[++i] = offset;
+					batch[++i] = offset >> 32;
+				} else if (gen >= 4) {
+					batch[++i] = 0;
+					batch[++i] = offset;
+				} else {
+					batch[++i] = offset;
+				}
+				batch[++i] = ~pass;
+				gem_write(fd, store.handle, 0, batch, sizeof(batch));
+
+				tmp[0] = scratch;
+				igt_assert(tmp[0].flags & EXEC_OBJECT_WRITE);
+				tmp[1] = store;
+				verify_reloc(fd, store.handle, &reloc);
+				execbuf.buffers_ptr = to_user_pointer(tmp);
+				gem_execbuf(fd, &execbuf);
+				igt_assert_eq_u64(reloc.presumed_offset, tmp[0].offset);
+				if (flags & SYNC)
+					gem_sync(fd, tmp[0].handle);
+				scratch = tmp[0];
+
+				gem_write(fd, batches[1023].handle, loc, &pass, sizeof(pass));
+				for (n = 1024; --n >= 1; ) {
+					int this_fd = fd;
+					uint32_t handle[2];
+
+					execbuf.buffers_ptr = to_user_pointer(&batches[n-1]);
+					reloc_migrations += batches[n-1].offset != inter[n].presumed_offset;
+					batches[n-1].offset = inter[n].presumed_offset;
+					old_offset = inter[n].presumed_offset;
+					batches[n-1].relocation_count = 0;
+					batches[n-1].flags |= EXEC_OBJECT_WRITE;
+					verify_reloc(fd, batches[n].handle, &inter[n]);
+
+					if (flags & FDS) {
+						this_fd = fds[rand() % 64];
+						handle[0] = batches[n-1].handle;
+						handle[1] = batches[n].handle;
+						batches[n-1].handle =
+							gem_open(this_fd,
+									gem_flink(fd, handle[0]));
+						batches[n].handle =
+							gem_open(this_fd,
+									gem_flink(fd, handle[1]));
+						if (flags & PRIORITY)
+							ctx_set_random_priority(this_fd, 0);
+					}
+
+					if (!(flags & CHAIN)) {
+						execbuf.flags &= ~ENGINE_MASK;
+						execbuf.flags |= engines[rand() % nengine];
+					}
+					if (flags & CONTEXTS) {
+						execbuf.rsvd1 = contexts[rand() % 64];
+						if (flags & PRIORITY)
+							ctx_set_random_priority(this_fd, execbuf.rsvd1);
+					}
+
+					gem_execbuf(this_fd, &execbuf);
+					if (inter[n].presumed_offset == -1) {
+						reloc_interruptions++;
+						inter[n].presumed_offset = batches[n-1].offset;
+					}
+					igt_assert_eq_u64(inter[n].presumed_offset, batches[n-1].offset);
+
+					if (flags & SYNC)
+						gem_sync(this_fd, batches[n-1].handle);
+					relocations += inter[n].presumed_offset != old_offset;
+
+					batches[n-1].relocation_count = 1;
+					batches[n-1].flags &= ~EXEC_OBJECT_WRITE;
+
+					if (this_fd != fd) {
+						gem_close(this_fd, batches[n-1].handle);
+						batches[n-1].handle = handle[0];
+
+						gem_close(this_fd, batches[n].handle);
+						batches[n].handle = handle[1];
+					}
+				}
+				execbuf.flags &= ~ENGINE_MASK;
+				execbuf.rsvd1 = 0;
+				execbuf.buffers_ptr = to_user_pointer(&tmp);
+
+				tmp[0] = tmp[1];
+				tmp[0].relocation_count = 0;
+				tmp[0].flags = EXEC_OBJECT_WRITE;
+				reloc_migrations += tmp[0].offset != inter[0].presumed_offset;
+				tmp[0].offset = inter[0].presumed_offset;
+				old_offset = tmp[0].offset;
+				tmp[1] = batches[0];
+				verify_reloc(fd, batches[0].handle, &inter[0]);
+				gem_execbuf(fd, &execbuf);
+				if (inter[0].presumed_offset == -1) {
+					reloc_interruptions++;
+					inter[0].presumed_offset = tmp[0].offset;
+				}
+				igt_assert_eq_u64(inter[0].presumed_offset, tmp[0].offset);
+				relocations += inter[0].presumed_offset != old_offset;
+				batches[0] = tmp[1];
+
+				tmp[1] = tmp[0];
+				tmp[0] = scratch;
+				igt_assert(tmp[0].flags & EXEC_OBJECT_WRITE);
+				igt_assert_eq_u64(reloc.presumed_offset, tmp[0].offset);
+				igt_assert(tmp[1].relocs_ptr == to_user_pointer(&reloc));
+				tmp[1].relocation_count = 1;
+				tmp[1].flags &= ~EXEC_OBJECT_WRITE;
+				verify_reloc(fd, store.handle, &reloc);
+				gem_execbuf(fd, &execbuf);
+				eb_migrations += tmp[0].offset != scratch.offset;
+				eb_migrations += tmp[1].offset != store.offset;
+				igt_assert_eq_u64(reloc.presumed_offset, tmp[0].offset);
+				if (flags & SYNC)
+					gem_sync(fd, tmp[0].handle);
+
+				store = tmp[1];
+				scratch = tmp[0];
+			}
+		}
+		igt_info("Number of migrations for execbuf: %d\n", eb_migrations);
+		igt_info("Number of migrations for reloc: %d, interrupted %d, patched %d\n", reloc_migrations, reloc_interruptions, relocations);
+
+		check_bo(fd, scratch.handle);
+		gem_close(fd, scratch.handle);
+		gem_close(fd, store.handle);
+
+		if (flags & FDS) {
+			for (n = 0; n < 64; n++)
+				close(fds[n]);
+		}
+		if (flags & CONTEXTS) {
+			for (n = 0; n < 64; n++)
+				gem_context_destroy(fd, contexts[n]);
+		}
+		for (n = 0; n < 1024; n++)
+			gem_close(fd, batches[n].handle);
+	}
+
+	igt_waitchildren();
+
+	if (flags & HANG)
+		fini_hang(&hang);
+	else
+		igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	close(debugfs);
+}
+
+igt_main
+{
+	const struct mode {
+		const char *name;
+		unsigned flags;
+	} modes[] = {
+		{ "normal", 0 },
+		{ "interruptible", INTERRUPTIBLE },
+		{ "forked", FORKED },
+		{ "sync", SYNC },
+		{ "chain", CHAIN },
+		{ "chain-forked", CHAIN | FORKED },
+		{ "chain-interruptible", CHAIN | INTERRUPTIBLE },
+		{ "chain-sync", CHAIN | SYNC },
+		{ "contexts", CONTEXTS },
+		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
+		{ "contexts-forked", CONTEXTS | FORKED},
+		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
+		{ "contexts-chain", CONTEXTS | CHAIN },
+		{ "contexts-sync", CONTEXTS | SYNC },
+		{ "fds", FDS },
+		{ "fds-interruptible", FDS | INTERRUPTIBLE},
+		{ "fds-forked", FDS | FORKED},
+		{ "fds-priority", FDS | FORKED | PRIORITY },
+		{ "fds-chain", FDS | CHAIN},
+		{ "fds-sync", FDS | SYNC},
+		{ NULL }
+	};
+	int fd;
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(fd);
+		igt_require(gem_can_store_dword(fd, 0));
+		gem_submission_print_method(fd);
+
+		igt_fork_hang_detector(fd);
+	}
+
+	for (const struct mode *m = modes; m->name; m++)
+		igt_subtest_f("%s", m->name)
+			whisper(fd, ALL_ENGINES, m->flags);
+
+	for (const struct intel_execution_engine *e = intel_execution_engines;
+	     e->name; e++) {
+		for (const struct mode *m = modes; m->name; m++) {
+			if (m->flags & CHAIN)
+				continue;
+
+			igt_subtest_f("%s-%s", e->name, m->name)
+				whisper(fd, e->exec_id | e->flags, m->flags);
+		}
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+	}
+
+	igt_subtest_group {
+		for (const struct mode *m = modes; m->name; m++) {
+			if (m->flags & INTERRUPTIBLE)
+				continue;
+			igt_subtest_f("hang-%s", m->name)
+				whisper(fd, ALL_ENGINES, m->flags | HANG);
+		}
+	}
+
+	igt_fixture {
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_fd_exhaustion.c b/tests/i915/gem_fd_exhaustion.c
new file mode 100644
index 00000000..559590b1
--- /dev/null
+++ b/tests/i915/gem_fd_exhaustion.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <limits.h>
+
+unsigned int original_nr_open;
+
+static int read_sysctl(const char *path)
+{
+	unsigned int val;
+	FILE *f = fopen(path, "r");
+
+	if (f) {
+		igt_assert(fscanf(f, "%u", &val) == 1);
+		fclose(f);
+		return val;
+	}
+	return -errno;
+}
+
+static int write_sysctl(const char *path, unsigned int val)
+{
+	FILE *f = fopen(path, "w");
+
+	if (f) {
+		igt_assert(fprintf(f, "%u", val));
+		fclose(f);
+		return 0;
+	}
+	return -errno;
+}
+
+static bool allow_unlimited_files(void)
+{
+	unsigned int nofile_rlim = 1024*1024;
+	struct rlimit rlim;
+	unsigned int buf;
+
+	buf = read_sysctl("/proc/sys/fs/file-max");
+	if (buf > 0)
+		nofile_rlim = buf;
+	original_nr_open = read_sysctl("/proc/sys/fs/nr_open");
+	igt_assert(write_sysctl("/proc/sys/fs/nr_open", nofile_rlim) == 0);
+
+	if (getrlimit(RLIMIT_NOFILE, &rlim))
+		return false;
+
+	rlim.rlim_cur = nofile_rlim;
+	rlim.rlim_max = nofile_rlim;
+	return setrlimit(RLIMIT_NOFILE, &rlim) == 0;
+}
+
+static void restore_original_sysctl(int sig)
+{
+	if (original_nr_open > 0)
+		write_sysctl("/proc/sys/fs/nr_open", original_nr_open);
+}
+
+igt_simple_main
+{
+	int fd;
+
+	igt_require(allow_unlimited_files());
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_install_exit_handler(restore_original_sysctl);
+
+	igt_fork(n, 1) {
+		igt_drop_root();
+
+		for (int i = 0; ; i++) {
+			int leak = open("/dev/null", O_RDONLY);
+			uint32_t handle;
+
+			if (__gem_create(fd, 4096, &handle) == 0)
+				gem_close(fd, handle);
+
+			if (leak < 0) {
+				igt_info("fd exhaustion after %i rounds.\n", i);
+				igt_assert(__gem_create(fd, 4096,
+							&handle) < 0);
+				break;
+			}
+		}
+
+		/* The child will free all the fds when exiting, so no need to
+		 * clean up to mess to ensure that the parent can at least run
+		 * the exit handlers. */
+	}
+
+	igt_waitchildren();
+
+	close(fd);
+}
diff --git a/tests/i915/gem_fence_thrash.c b/tests/i915/gem_fence_thrash.c
new file mode 100644
index 00000000..2d7fb2ff
--- /dev/null
+++ b/tests/i915/gem_fence_thrash.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright © 2008-9 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "config.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include "drm.h"
+
+#include "igt.h"
+#include "igt_x86.h"
+
+#define PAGE_SIZE 4096
+#define CACHELINE 64
+
+#define OBJECT_SIZE (128*1024) /* restricted to 1MiB alignment on i915 fences */
+
+/* Before introduction of the LRU list for fences, allocation of a fence for a page
+ * fault would use the first inactive fence (i.e. in preference one with no outstanding
+ * GPU activity, or it would wait on the first to finish). Given the choice, it would simply
+ * reuse the fence that had just been allocated for the previous page-fault - the worst choice
+ * when copying between two buffers and thus constantly swapping fences.
+ */
+
+struct test {
+	int fd;
+	int tiling;
+	int num_surfaces;
+};
+
+static void *
+bo_create (int fd, int tiling)
+{
+	uint32_t handle;
+	void *ptr;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	/* dirty cpu caches a bit ... */
+	ptr = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE,
+			    PROT_READ | PROT_WRITE);
+	memset(ptr, 0, OBJECT_SIZE);
+	munmap(ptr, OBJECT_SIZE);
+
+	gem_set_tiling(fd, handle, tiling, 1024);
+
+	ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(fd, handle);
+
+	return ptr;
+}
+
+static void *
+bo_copy (void *_arg)
+{
+	struct test *t = (struct test *)_arg;
+	int fd = t->fd;
+	int n;
+	char *a, *b;
+
+	a = bo_create (fd, t->tiling);
+	b = bo_create (fd, t->tiling);
+
+	for (n = 0; n < 1000; n++) {
+		memcpy (a, b, OBJECT_SIZE);
+		sched_yield ();
+	}
+
+	munmap(a, OBJECT_SIZE);
+	munmap(b, OBJECT_SIZE);
+
+	return NULL;
+}
+
+static void copy_wc_page(void *dst, const void *src)
+{
+	igt_memcpy_from_wc(dst, src, PAGE_SIZE);
+}
+
+static void copy_wc_cacheline(void *dst, const void *src)
+{
+	igt_memcpy_from_wc(dst, src, CACHELINE);
+}
+
+static void
+_bo_write_verify(struct test *t)
+{
+	int fd = t->fd;
+	int i, k;
+	uint32_t **s;
+	unsigned int dwords = OBJECT_SIZE >> 2;
+	const char *tile_str[] = { "none", "x", "y" };
+	uint32_t tmp[PAGE_SIZE/sizeof(uint32_t)];
+
+	igt_assert(t->tiling >= 0 && t->tiling <= I915_TILING_Y);
+	igt_assert_lt(0, t->num_surfaces);
+
+	s = calloc(sizeof(*s), t->num_surfaces);
+	igt_assert(s);
+
+	for (k = 0; k < t->num_surfaces; k++)
+		s[k] = bo_create(fd, t->tiling);
+
+	for (k = 0; k < t->num_surfaces; k++) {
+		uint32_t *a = s[k];
+
+		a[0] = 0xdeadbeef;
+		igt_assert_f(a[0] == 0xdeadbeef,
+			     "tiling %s: write failed at start (%x)\n",
+			     tile_str[t->tiling], a[0]);
+
+		a[dwords - 1] = 0xc0ffee;
+		igt_assert_f(a[dwords - 1] == 0xc0ffee,
+			     "tiling %s: write failed at end (%x)\n",
+			     tile_str[t->tiling], a[dwords - 1]);
+
+		for (i = 0; i < dwords; i += CACHELINE/sizeof(uint32_t)) {
+			for (int j = 0; j < CACHELINE/sizeof(uint32_t); j++)
+				a[i + j] = ~(i + j);
+
+			copy_wc_cacheline(tmp, a + i);
+			for (int j = 0; j < CACHELINE/sizeof(uint32_t); j++)
+				igt_assert_f(tmp[j] == ~(i+ j),
+					     "tiling %s: write failed at %d (%x)\n",
+					     tile_str[t->tiling], i + j, tmp[j]);
+
+			for (int j = 0; j < CACHELINE/sizeof(uint32_t); j++)
+				a[i + j] = i + j;
+		}
+
+		for (i = 0; i < dwords; i += PAGE_SIZE/sizeof(uint32_t)) {
+			copy_wc_page(tmp, a + i);
+			for (int j = 0; j < PAGE_SIZE/sizeof(uint32_t); j++) {
+				igt_assert_f(tmp[j] == i + j,
+					     "tiling %s: verify failed at %d (%x)\n",
+					     tile_str[t->tiling], i + j, tmp[j]);
+			}
+		}
+	}
+
+	for (k = 0; k < t->num_surfaces; k++)
+		munmap(s[k], OBJECT_SIZE);
+
+	free(s);
+}
+
+static void *
+bo_write_verify(void *_arg)
+{
+	struct test *t = (struct test *)_arg;
+	int i;
+
+	for (i = 0; i < 10; i++)
+		_bo_write_verify(t);
+
+	return 0;
+}
+
+static int run_test(int threads_per_fence, void *f, int tiling,
+		    int surfaces_per_thread)
+{
+	struct test t;
+	pthread_t *threads;
+	int n, num_fences, num_threads;
+
+	t.fd = drm_open_driver(DRIVER_INTEL);
+	t.tiling = tiling;
+	t.num_surfaces = surfaces_per_thread;
+
+	num_fences = gem_available_fences(t.fd);
+	igt_assert_lt(0, num_fences);
+
+	num_threads = threads_per_fence * num_fences;
+
+	igt_info("%s: threads %d, fences %d, tiling %d, surfaces per thread %d\n",
+		 f == bo_copy ? "copy" : "write-verify", num_threads,
+		 num_fences, tiling, surfaces_per_thread);
+
+	if (threads_per_fence) {
+		threads = calloc(sizeof(*threads), num_threads);
+		igt_assert(threads != NULL);
+
+		for (n = 0; n < num_threads; n++)
+			pthread_create (&threads[n], NULL, f, &t);
+
+		for (n = 0; n < num_threads; n++)
+			pthread_join (threads[n], NULL);
+
+		free(threads);
+	} else {
+		void *(*func)(void *) = f;
+		igt_assert(func(&t) == (void *)0);
+	}
+
+	close(t.fd);
+
+	return 0;
+}
+
+igt_main
+{
+	igt_skip_on_simulation();
+
+	igt_subtest("bo-write-verify-none")
+		igt_assert(run_test(0, bo_write_verify, I915_TILING_NONE, 80) == 0);
+
+	igt_subtest("bo-write-verify-x")
+		igt_assert(run_test(0, bo_write_verify, I915_TILING_X, 80) == 0);
+
+	igt_subtest("bo-write-verify-y")
+		igt_assert(run_test(0, bo_write_verify, I915_TILING_Y, 80) == 0);
+
+	igt_subtest("bo-write-verify-threaded-none")
+		igt_assert(run_test(5, bo_write_verify, I915_TILING_NONE, 2) == 0);
+
+	igt_subtest("bo-write-verify-threaded-x") {
+		igt_assert(run_test(2, bo_write_verify, I915_TILING_X, 2) == 0);
+		igt_assert(run_test(5, bo_write_verify, I915_TILING_X, 2) == 0);
+		igt_assert(run_test(10, bo_write_verify, I915_TILING_X, 2) == 0);
+		igt_assert(run_test(20, bo_write_verify, I915_TILING_X, 2) == 0);
+	}
+
+	igt_subtest("bo-write-verify-threaded-y") {
+		igt_assert(run_test(2, bo_write_verify, I915_TILING_Y, 2) == 0);
+		igt_assert(run_test(5, bo_write_verify, I915_TILING_Y, 2) == 0);
+		igt_assert(run_test(10, bo_write_verify, I915_TILING_Y, 2) == 0);
+		igt_assert(run_test(20, bo_write_verify, I915_TILING_Y, 2) == 0);
+	}
+
+	igt_subtest("bo-copy")
+		igt_assert(run_test(1, bo_copy, I915_TILING_X, 1) == 0);
+}
diff --git a/tests/i915/gem_fence_upload.c b/tests/i915/gem_fence_upload.c
new file mode 100644
index 00000000..f3b0e62f
--- /dev/null
+++ b/tests/i915/gem_fence_upload.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "config.h"
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include "drm.h"
+#include "i915_drm.h"
+
+#define OBJECT_SIZE (1024*1024) /* restricted to 1MiB alignment on i915 fences */
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end)
+{
+	return (end->tv_sec - start->tv_sec) + 1e-6*(end->tv_usec - start->tv_usec);
+}
+
+static void performance(void)
+{
+	int n, loop, count;
+	int fd, num_fences;
+	double linear[2], tiled[2];
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	num_fences = gem_available_fences(fd);
+	igt_require(num_fences > 0);
+
+	for (count = 2; count < 4*num_fences; count *= 2) {
+		struct timeval start, end;
+		uint32_t handle[count];
+		void *ptr[count];
+
+		for (n = 0; n < count; n++) {
+			handle[n] = gem_create(fd, OBJECT_SIZE);
+			ptr[n] = gem_mmap__gtt(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
+		}
+
+		gettimeofday(&start, NULL);
+		for (loop = 0; loop < 1024; loop++) {
+			for (n = 0; n < count; n++)
+				memset(ptr[n], 0, OBJECT_SIZE);
+		}
+		gettimeofday(&end, NULL);
+
+		linear[count != 2] = count * loop / elapsed(&start, &end);
+		igt_info("Upload rate for %d linear surfaces:	%7.3fMiB/s\n", count, linear[count != 2]);
+
+		for (n = 0; n < count; n++)
+			gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
+
+		gettimeofday(&start, NULL);
+		for (loop = 0; loop < 1024; loop++) {
+			for (n = 0; n < count; n++)
+				memset(ptr[n], 0, OBJECT_SIZE);
+		}
+		gettimeofday(&end, NULL);
+
+		tiled[count != 2] = count * loop / elapsed(&start, &end);
+		igt_info("Upload rate for %d tiled surfaces:	%7.3fMiB/s\n", count, tiled[count != 2]);
+
+		for (n = 0; n < count; n++) {
+			munmap(ptr[n], OBJECT_SIZE);
+			gem_close(fd, handle[n]);
+		}
+
+	}
+
+	errno = 0;
+	igt_assert(linear[1] > 0.75 * linear[0]);
+	igt_assert(tiled[1] > 0.75 * tiled[0]);
+}
+
+struct thread_performance {
+	pthread_t thread;
+	int id, count, direction, loops;
+	void **ptr;
+};
+
+static void *read_thread_performance(void *closure)
+{
+	struct thread_performance *t = closure;
+	uint32_t x = 0;
+	int n, m;
+
+	for (n = 0; n < t->loops; n++) {
+		uint32_t *src = t->ptr[rand() % t->count];
+		src += (rand() % 256) * 4096 / 4;
+		for (m = 0; m < 4096/4; m++)
+			x += src[m];
+	}
+
+	return (void *)(uintptr_t)x;
+}
+
+static void *write_thread_performance(void *closure)
+{
+	struct thread_performance *t = closure;
+	int n;
+
+	for (n = 0; n < t->loops; n++) {
+		uint32_t *dst = t->ptr[rand() % t->count];
+		dst += (rand() % 256) * 4096 / 4;
+		memset(dst, 0, 4096);
+	}
+
+	return NULL;
+}
+
+#define READ (1<<0)
+#define WRITE (1<<1)
+static const char *direction_string(unsigned mask)
+{
+	switch (mask) {
+	case READ: return "Download";
+	case WRITE: return "Upload";
+	case READ | WRITE: return "Combined";
+	default: return "Unknown";
+	}
+}
+static void thread_performance(unsigned mask)
+{
+	const int loops = 4096;
+	int n, count;
+	int fd, num_fences;
+	double linear[2], tiled[2];
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	num_fences = gem_available_fences(fd);
+	igt_require(num_fences > 0);
+
+	for (count = 2; count < 4*num_fences; count *= 2) {
+		const int nthreads = (mask & READ ? count : 0) + (mask & WRITE ? count : 0);
+		struct timeval start, end;
+		struct thread_performance readers[count];
+		struct thread_performance writers[count];
+		uint32_t handle[count];
+		void *ptr[count];
+
+		for (n = 0; n < count; n++) {
+			handle[n] = gem_create(fd, OBJECT_SIZE);
+			ptr[n] = gem_mmap__gtt(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+			if (mask & READ) {
+				readers[n].id = n;
+				readers[n].direction = READ;
+				readers[n].ptr = ptr;
+				readers[n].count = count;
+				readers[n].loops = loops;
+			}
+
+			if (mask & WRITE) {
+				writers[n].id = count - n - 1;
+				writers[n].direction = WRITE;
+				writers[n].ptr = ptr;
+				writers[n].count = count;
+				writers[n].loops = loops;
+			}
+		}
+
+		gettimeofday(&start, NULL);
+		for (n = 0; n < count; n++) {
+			if (mask & READ)
+				pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
+			if (mask & WRITE)
+				pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
+		}
+		for (n = 0; n < count; n++) {
+			if (mask & READ)
+				pthread_join(readers[n].thread, NULL);
+			if (mask & WRITE)
+				pthread_join(writers[n].thread, NULL);
+		}
+		gettimeofday(&end, NULL);
+
+		linear[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
+		igt_info("%s rate for %d linear surfaces, %d threads:	%7.3fMiB/s\n", direction_string(mask), count, nthreads, linear[count != 2]);
+
+		for (n = 0; n < count; n++)
+			gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
+
+		gettimeofday(&start, NULL);
+		for (n = 0; n < count; n++) {
+			if (mask & READ)
+				pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
+			if (mask & WRITE)
+				pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
+		}
+		for (n = 0; n < count; n++) {
+			if (mask & READ)
+				pthread_join(readers[n].thread, NULL);
+			if (mask & WRITE)
+				pthread_join(writers[n].thread, NULL);
+		}
+		gettimeofday(&end, NULL);
+
+		tiled[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
+		igt_info("%s rate for %d tiled surfaces, %d threads:	%7.3fMiB/s\n", direction_string(mask), count, nthreads, tiled[count != 2]);
+
+		for (n = 0; n < count; n++) {
+			munmap(ptr[n], OBJECT_SIZE);
+			gem_close(fd, handle[n]);
+		}
+	}
+
+	errno = 0;
+	igt_assert(linear[1] > 0.75 * linear[0]);
+	igt_assert(tiled[1] > 0.75 * tiled[0]);
+}
+
+struct thread_contention {
+	pthread_t thread;
+	uint32_t handle;
+	int loops, fd;
+};
+static void *no_contention(void *closure)
+{
+	struct thread_contention *t = closure;
+	int n;
+
+	for (n = 0; n < t->loops; n++) {
+		uint32_t *ptr = gem_mmap__gtt(t->fd, t->handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+		memset(ptr + (rand() % 256) * 4096 / 4, 0, 4096);
+		munmap(ptr, OBJECT_SIZE);
+	}
+
+	return NULL;
+}
+
+static void *wc_mmap(void *closure)
+{
+	struct thread_contention *t = closure;
+	int n;
+
+	for (n = 0; n < t->loops; n++) {
+		uint32_t *ptr = gem_mmap__wc(t->fd, t->handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+		memset(ptr + (rand() % 256) * 4096 / 4, 0, 4096);
+		munmap(ptr, OBJECT_SIZE);
+	}
+
+	return NULL;
+}
+
+static void thread_contention(void)
+{
+	const int loops = 4096;
+	int n, count;
+	int fd, num_fences;
+	double linear[2], tiled[2];
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	num_fences = gem_available_fences(fd);
+	igt_require(num_fences > 0);
+
+	for (count = 1; count < 4*num_fences; count *= 2) {
+		struct timeval start, end;
+		struct thread_contention threads[count];
+
+		for (n = 0; n < count; n++) {
+			threads[n].handle = gem_create(fd, OBJECT_SIZE);
+			threads[n].loops = loops;
+			threads[n].fd = fd;
+		}
+
+		gettimeofday(&start, NULL);
+		for (n = 0; n < count; n++)
+			pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
+		for (n = 0; n < count; n++)
+			pthread_join(threads[n].thread, NULL);
+		gettimeofday(&end, NULL);
+
+		linear[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
+		igt_info("Contended upload rate for %d linear threads:	%7.3fMiB/s\n", count, linear[count != 2]);
+
+		for (n = 0; n < count; n++)
+			gem_set_tiling(fd, threads[n].handle, I915_TILING_X, 1024);
+
+		gettimeofday(&start, NULL);
+		for (n = 0; n < count; n++)
+			pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
+		for (n = 0; n < count; n++)
+			pthread_join(threads[n].thread, NULL);
+		gettimeofday(&end, NULL);
+
+		tiled[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
+		igt_info("Contended upload rate for %d tiled threads:	%7.3fMiB/s\n", count, tiled[count != 2]);
+
+		for (n = 0; n < count; n++) {
+			gem_close(fd, threads[n].handle);
+		}
+	}
+
+	errno = 0;
+	igt_assert(linear[1] > 0.75 * linear[0]);
+	igt_assert(tiled[1] > 0.75 * tiled[0]);
+}
+
+static void wc_contention(void)
+{
+	const int loops = 4096;
+	int n, count;
+	int fd, num_fences;
+	double linear[2], tiled[2];
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	gem_require_mmap_wc(fd);
+
+	num_fences = gem_available_fences(fd);
+	igt_require(num_fences > 0);
+
+	for (count = 1; count < 4*num_fences; count *= 2) {
+		struct timeval start, end;
+		struct thread_contention threads[count];
+
+		for (n = 0; n < count; n++) {
+			threads[n].handle = gem_create(fd, OBJECT_SIZE);
+			threads[n].loops = loops;
+			threads[n].fd = fd;
+		}
+
+		gettimeofday(&start, NULL);
+		for (n = 0; n < count; n++)
+			pthread_create(&threads[n].thread, NULL, wc_mmap, &threads[n]);
+		for (n = 0; n < count; n++)
+			pthread_join(threads[n].thread, NULL);
+		gettimeofday(&end, NULL);
+
+		linear[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
+		igt_info("Contended upload rate for %d linear threads/wc:	%7.3fMiB/s\n", count, linear[count != 2]);
+
+		for (n = 0; n < count; n++)
+			gem_set_tiling(fd, threads[n].handle, I915_TILING_X, 1024);
+
+		gettimeofday(&start, NULL);
+		for (n = 0; n < count; n++)
+			pthread_create(&threads[n].thread, NULL, wc_mmap, &threads[n]);
+		for (n = 0; n < count; n++)
+			pthread_join(threads[n].thread, NULL);
+		gettimeofday(&end, NULL);
+
+		tiled[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
+		igt_info("Contended upload rate for %d tiled threads/wc:	%7.3fMiB/s\n", count, tiled[count != 2]);
+
+		for (n = 0; n < count; n++) {
+			gem_close(fd, threads[n].handle);
+		}
+	}
+
+	errno = 0;
+	igt_assert(linear[1] > 0.75 * linear[0]);
+	igt_assert(tiled[1] > 0.75 * tiled[0]);
+}
+
+igt_main
+{
+	igt_skip_on_simulation();
+
+	igt_subtest("performance")
+		performance();
+	igt_subtest("thread-contention")
+		thread_contention();
+	igt_subtest("wc-contention")
+		wc_contention();
+	igt_subtest("thread-performance-read")
+		thread_performance(READ);
+	igt_subtest("thread-performance-write")
+		thread_performance(WRITE);
+	igt_subtest("thread-performance-both")
+		thread_performance(READ | WRITE);
+}
diff --git a/tests/i915/gem_fenced_exec_thrash.c b/tests/i915/gem_fenced_exec_thrash.c
new file mode 100644
index 00000000..7248d310
--- /dev/null
+++ b/tests/i915/gem_fenced_exec_thrash.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+
+#include "igt.h"
+
+IGT_TEST_DESCRIPTION("Test execbuf fence accounting.");
+
+#define WIDTH 1024
+#define HEIGHT 1024
+#define OBJECT_SIZE (4*WIDTH*HEIGHT)
+
+#define BATCH_SIZE 4096
+
+#define MAX_FENCES 64
+
+/*
+ * Testcase: execbuf fence accounting
+ *
+ * We had a bug where we were falsely accounting upon reservation already
+ * fenced buffers as occupying a fence register even if they did not require
+ * one for the batch.
+ *
+ * We aim to exercise this by performing a sequence of fenced BLT
+ * with 2*num_avail_fence buffers, but alternating which half are fenced in
+ * each command.
+ */
+
+static uint32_t
+tiled_bo_create (int fd)
+{
+	uint32_t handle;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	gem_set_tiling(fd, handle, I915_TILING_X, WIDTH*4);
+
+	return handle;
+}
+
+static uint32_t
+batch_create (int fd)
+{
+	uint32_t buf[] = { MI_BATCH_BUFFER_END, 0 };
+	uint32_t batch_handle;
+
+	batch_handle = gem_create(fd, BATCH_SIZE);
+
+	gem_write(fd, batch_handle, 0, buf, sizeof(buf));
+
+	return batch_handle;
+}
+
+static void fill_reloc(struct drm_i915_gem_relocation_entry *reloc, uint32_t handle)
+{
+	reloc->offset = 2 * sizeof(uint32_t);
+	reloc->target_handle = handle;
+	reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc->write_domain = 0;
+}
+
+#define BUSY_LOAD (1 << 0)
+#define INTERRUPTIBLE (1 << 1)
+
+static void run_test(int fd, int num_fences, int expected_errno,
+		     unsigned flags)
+{
+	struct drm_i915_gem_execbuffer2 execbuf[2];
+	struct drm_i915_gem_exec_object2 exec[2][2*MAX_FENCES+1];
+	struct drm_i915_gem_relocation_entry reloc[2*MAX_FENCES];
+
+	unsigned long count;
+	int i, n;
+
+	igt_assert(2*num_fences+1 <= ARRAY_SIZE(exec[0]));
+	igt_assert(2*num_fences <= ARRAY_SIZE(reloc));
+
+	memset(execbuf, 0, sizeof(execbuf));
+	memset(exec, 0, sizeof(exec));
+	memset(reloc, 0, sizeof(reloc));
+
+	for (n = 0; n < 2*num_fences; n++) {
+		uint32_t handle = tiled_bo_create(fd);
+		exec[1][2*num_fences - n-1].handle = exec[0][n].handle = handle;
+		fill_reloc(&reloc[n], handle);
+	}
+
+	for (i = 0; i < 2; i++) {
+		for (n = 0; n < num_fences; n++)
+			exec[i][n].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+		exec[i][2*num_fences].handle = batch_create(fd);
+		exec[i][2*num_fences].relocs_ptr = to_user_pointer(reloc);
+		exec[i][2*num_fences].relocation_count = 2*num_fences;
+
+		execbuf[i].buffers_ptr = to_user_pointer(exec[i]);
+		execbuf[i].buffer_count = 2*num_fences+1;
+		execbuf[i].batch_len = 2*sizeof(uint32_t);
+	}
+
+	count = 0;
+	igt_until_timeout(2) {
+		for (i = 0; i < 2; i++) {
+			igt_spin_t *spin = NULL;
+
+			if (flags & BUSY_LOAD)
+				spin = __igt_spin_batch_new(fd);
+
+			igt_while_interruptible(flags & INTERRUPTIBLE) {
+				igt_assert_eq(__gem_execbuf(fd, &execbuf[i]),
+					      -expected_errno);
+			}
+
+			igt_spin_batch_free(fd, spin);
+			gem_quiescent_gpu(fd);
+		}
+		count++;
+	}
+	igt_info("Completed %lu cycles\n", count);
+
+	/* Cleanup */
+	for (n = 0; n < 2*num_fences; n++)
+		gem_close(fd, exec[0][n].handle);
+
+	for (i = 0; i < 2; i++)
+		gem_close(fd, exec[i][2*num_fences].handle);
+}
+
+igt_main
+{
+	uint32_t devid = 0;
+	unsigned int num_fences = 0;
+	int fd = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		num_fences = gem_available_fences(fd);
+		igt_assert(num_fences > 4);
+		igt_assert(num_fences <= MAX_FENCES);
+
+		devid = intel_get_drm_devid(fd);
+	}
+
+	igt_subtest("2-spare-fences")
+		run_test(fd, num_fences - 2, 0, 0);
+	for (unsigned flags = 0; flags < 4; flags++) {
+		igt_subtest_f("no-spare-fences%s%s",
+			      flags & BUSY_LOAD ? "-busy" : "",
+			      flags & INTERRUPTIBLE ? "-interruptible" : "")
+			run_test(fd, num_fences, 0, flags);
+	}
+	igt_subtest("too-many-fences")
+		run_test(fd, num_fences + 1, intel_gen(devid) >= 4 ? 0 : EDEADLK, 0);
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_flink_basic.c b/tests/i915/gem_flink_basic.c
new file mode 100644
index 00000000..48b0b8b1
--- /dev/null
+++ b/tests/i915/gem_flink_basic.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Tests for flink - a way to export a gem object by name");
+
+static void
+test_flink(int fd)
+{
+	struct drm_i915_gem_create create;
+	struct drm_gem_flink flink;
+	struct drm_gem_open open_struct;
+	int ret;
+
+	igt_debug("Testing flink and open.\n");
+
+	memset(&create, 0, sizeof(create));
+	create.size = 16 * 1024;
+	ret = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+	igt_assert_eq(ret, 0);
+
+	flink.handle = create.handle;
+	ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
+	igt_assert_eq(ret, 0);
+
+	open_struct.name = flink.name;
+	ret = ioctl(fd, DRM_IOCTL_GEM_OPEN, &open_struct);
+	igt_assert_eq(ret, 0);
+	igt_assert(open_struct.handle != 0);
+}
+
+static void
+test_double_flink(int fd)
+{
+	struct drm_i915_gem_create create;
+	struct drm_gem_flink flink;
+	struct drm_gem_flink flink2;
+	int ret;
+
+	igt_debug("Testing repeated flink.\n");
+
+	memset(&create, 0, sizeof(create));
+	create.size = 16 * 1024;
+	ret = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+	igt_assert_eq(ret, 0);
+
+	flink.handle = create.handle;
+	ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
+	igt_assert_eq(ret, 0);
+
+	flink2.handle = create.handle;
+	ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink2);
+	igt_assert_eq(ret, 0);
+
+	/* flinks for same gem object share the same name */
+	igt_assert(flink2.name == flink.name);
+}
+
+static void
+test_bad_flink(int fd)
+{
+	struct drm_gem_flink flink;
+	int ret;
+
+	igt_debug("Testing error return on bad flink ioctl.\n");
+
+	flink.handle = 0x10101010;
+	ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
+	igt_assert(ret == -1 && errno == ENOENT);
+}
+
+static void
+test_bad_open(int fd)
+{
+	struct drm_gem_open open_struct;
+	int ret;
+
+	igt_debug("Testing error return on bad open ioctl.\n");
+
+	open_struct.name = 0x10101010;
+	ret = ioctl(fd, DRM_IOCTL_GEM_OPEN, &open_struct);
+
+	igt_assert(ret == -1 && errno == ENOENT);
+}
+
+static void
+test_flink_lifetime(int fd)
+{
+	struct drm_i915_gem_create create;
+	struct drm_gem_flink flink;
+	struct drm_gem_open open_struct;
+	int ret, fd2;
+
+	igt_debug("Testing flink lifetime.\n");
+
+	fd2 = drm_open_driver(DRIVER_INTEL);
+
+	memset(&create, 0, sizeof(create));
+	create.size = 16 * 1024;
+	ret = ioctl(fd2, DRM_IOCTL_I915_GEM_CREATE, &create);
+	igt_assert_eq(ret, 0);
+
+	flink.handle = create.handle;
+	ret = ioctl(fd2, DRM_IOCTL_GEM_FLINK, &flink);
+	igt_assert_eq(ret, 0);
+
+	/* Open a second reference to the gem object with different fd */
+	open_struct.name = flink.name;
+	ret = ioctl(fd, DRM_IOCTL_GEM_OPEN, &open_struct);
+	igt_assert_eq(ret, 0);
+	igt_assert(open_struct.handle != 0);
+
+	close(fd2);
+	fd2 = drm_open_driver(DRIVER_INTEL);
+
+	/* Flink name remains valid due to the second reference */
+	open_struct.name = flink.name;
+	ret = ioctl(fd2, DRM_IOCTL_GEM_OPEN, &open_struct);
+	igt_assert_eq(ret, 0);
+	igt_assert(open_struct.handle != 0);
+}
+
+int fd;
+
+igt_main
+{
+	igt_fixture
+		fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_subtest("basic")
+		test_flink(fd);
+	igt_subtest("double-flink")
+		test_double_flink(fd);
+	igt_subtest("bad-flink")
+		test_bad_flink(fd);
+	igt_subtest("bad-open")
+		test_bad_open(fd);
+
+	/* Flink lifetime is limited to that of the gem object it points to */
+	igt_subtest("flink-lifetime")
+		test_flink_lifetime(fd);
+}
diff --git a/tests/i915/gem_flink_race.c b/tests/i915/gem_flink_race.c
new file mode 100644
index 00000000..322befe9
--- /dev/null
+++ b/tests/i915/gem_flink_race.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ */
+
+#include "igt.h"
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Check for flink/open vs. gem close races.");
+
+/* Testcase: check for flink/open vs. gem close races
+ *
+ * The gem flink open ioctl had a little race with gem close which could result
+ * in the flink name and corresponding reference getting leaked.
+ */
+
+/* We want lockless and I'm to lazy to dig out an atomic library. On x86 this
+ * works, too. */
+volatile int pls_die = 0;
+int fd;
+
+struct flink_name {
+	pthread_t thread;
+	unsigned long count;
+};
+
+static void *thread_fn_flink_name(void *p)
+{
+	struct flink_name *t = p;
+	struct drm_gem_open open_struct;
+	int ret;
+
+	while (!pls_die) {
+		memset(&open_struct, 0, sizeof(open_struct));
+
+		open_struct.name = 1;
+		ret = ioctl(fd, DRM_IOCTL_GEM_OPEN, &open_struct);
+		if (ret == 0) {
+			uint32_t name = gem_flink(fd, open_struct.handle);
+
+			igt_assert(name == 1);
+
+			gem_close(fd, open_struct.handle);
+			t->count++;
+		} else
+			igt_assert(errno == ENOENT);
+	}
+
+	return (void *)0;
+}
+
+static void test_flink_name(int timeout)
+{
+	struct flink_name *threads;
+	int r, i, num_threads;
+	unsigned long count;
+	char buf[256];
+	void *status;
+	int len;
+
+	num_threads = sysconf(_SC_NPROCESSORS_ONLN) - 1;
+	if (!num_threads)
+		num_threads = 1;
+
+	threads = calloc(num_threads, sizeof(*threads));
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	for (i = 0; i < num_threads; i++) {
+		r = pthread_create(&threads[i].thread, NULL,
+				   thread_fn_flink_name, &threads[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	count = 0;
+	igt_until_timeout(timeout) {
+		uint32_t handle;
+
+		handle = gem_create(fd, 4096);
+		gem_flink(fd, handle);
+		gem_close(fd, handle);
+
+		count++;
+	}
+
+	pls_die = 1;
+
+	len = snprintf(buf, sizeof(buf), "Completed %lu cycles with [", count);
+	for (i = 0;  i < num_threads; i++) {
+		pthread_join(threads[i].thread, &status);
+		igt_assert(status == 0);
+		len += snprintf(buf + len, sizeof(buf) - len, "%lu, ", threads[i].count);
+	}
+	snprintf(buf + len - 2, sizeof(buf) - len + 2, "] races");
+	igt_info("%s\n", buf);
+
+	close(fd);
+}
+
+static void *thread_fn_flink_close(void *p)
+{
+	struct drm_gem_flink flink;
+	struct drm_gem_close close_bo;
+	uint32_t handle;
+
+	while (!pls_die) {
+		/* We want to race gem close against flink on handle one.*/
+		handle = gem_create(fd, 4096);
+		if (handle != 1)
+			gem_close(fd, handle);
+
+		/* raw ioctl since we expect this to fail */
+		flink.handle = 1;
+		ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
+
+		close_bo.handle = 1;
+		ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
+	}
+
+	return (void *)0;
+}
+
+static void test_flink_close(void)
+{
+	pthread_t *threads;
+	int r, i, num_threads;
+	int obj_count;
+	void *status;
+	int fake;
+
+	/* Allocate exit handler fds in here so that we dont screw
+	 * up the counts */
+	fake = drm_open_driver(DRIVER_INTEL);
+
+	obj_count = igt_get_stable_obj_count(fake);
+
+	num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+	threads = calloc(num_threads, sizeof(pthread_t));
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	for (i = 0; i < num_threads; i++) {
+		r = pthread_create(&threads[i], NULL,
+				   thread_fn_flink_close, NULL);
+		igt_assert_eq(r, 0);
+	}
+
+	sleep(5);
+
+	pls_die = 1;
+
+	for (i = 0;  i < num_threads; i++) {
+		pthread_join(threads[i], &status);
+		igt_assert(status == 0);
+	}
+
+	close(fd);
+
+	obj_count = igt_get_stable_obj_count(fake) - obj_count;
+
+	igt_info("leaked %i objects\n", obj_count);
+
+	close(fake);
+
+	igt_assert_eq(obj_count, 0);
+}
+
+igt_main
+{
+	igt_skip_on_simulation();
+
+	igt_subtest("flink_name")
+		test_flink_name(5);
+
+	igt_subtest("flink_close")
+		test_flink_close();
+}
diff --git a/tests/i915/gem_gpgpu_fill.c b/tests/i915/gem_gpgpu_fill.c
new file mode 100644
index 00000000..dfb58165
--- /dev/null
+++ b/tests/i915/gem_gpgpu_fill.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Damien Lespiau <damien.lespiau@intel.com>
+ *    Xiang, Haihao <haihao.xiang@intel.com>
+ */
+
+/*
+ * This file is a basic test for the gpgpu_fill() function, a very simple
+ * workload for the GPGPU pipeline.
+ */
+
+#include "igt.h"
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+#define WIDTH 64
+#define HEIGHT 64
+#define STRIDE (WIDTH)
+#define SIZE (HEIGHT*STRIDE)
+
+#define COLOR_C4	0xc4
+#define COLOR_4C	0x4c
+
+typedef struct {
+	int drm_fd;
+	uint32_t devid;
+	drm_intel_bufmgr *bufmgr;
+	uint8_t linear[WIDTH * HEIGHT];
+} data_t;
+
+static void scratch_buf_init(data_t *data, struct igt_buf *buf,
+			int width, int height, int stride, uint8_t color)
+{
+	drm_intel_bo *bo;
+	int i;
+
+	bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096);
+	for (i = 0; i < width * height; i++)
+		data->linear[i] = color;
+	gem_write(data->drm_fd, bo->handle, 0, data->linear,
+		sizeof(data->linear));
+
+	memset(buf, 0, sizeof(*buf));
+
+	buf->bo = bo;
+	buf->stride = stride;
+	buf->tiling = I915_TILING_NONE;
+	buf->size = SIZE;
+}
+
+static void
+scratch_buf_check(data_t *data, struct igt_buf *buf, int x, int y,
+		uint8_t color)
+{
+	uint8_t val;
+
+	gem_read(data->drm_fd, buf->bo->handle, 0,
+		data->linear, sizeof(data->linear));
+	val = data->linear[y * WIDTH + x];
+	igt_assert_f(val == color,
+		     "Expected 0x%02x, found 0x%02x at (%d,%d)\n",
+		     color, val, x, y);
+}
+
+igt_simple_main
+{
+	data_t data = {0, };
+	struct intel_batchbuffer *batch = NULL;
+	struct igt_buf dst;
+	igt_fillfunc_t gpgpu_fill = NULL;
+	int i, j;
+
+	data.drm_fd = drm_open_driver_render(DRIVER_INTEL);
+	data.devid = intel_get_drm_devid(data.drm_fd);
+	igt_require_gem(data.drm_fd);
+
+	data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
+	igt_assert(data.bufmgr);
+
+	gpgpu_fill = igt_get_gpgpu_fillfunc(data.devid);
+
+	igt_require_f(gpgpu_fill,
+		      "no gpgpu-fill function\n");
+
+	batch = intel_batchbuffer_alloc(data.bufmgr, data.devid);
+	igt_assert(batch);
+
+	scratch_buf_init(&data, &dst, WIDTH, HEIGHT, STRIDE, COLOR_C4);
+
+	for (i = 0; i < WIDTH; i++) {
+		for (j = 0; j < HEIGHT; j++) {
+			scratch_buf_check(&data, &dst, i, j, COLOR_C4);
+		}
+	}
+
+	gpgpu_fill(batch,
+		   &dst, 0, 0, WIDTH / 2, HEIGHT / 2,
+		   COLOR_4C);
+
+	for (i = 0; i < WIDTH; i++) {
+		for (j = 0; j < HEIGHT; j++) {
+			if (i < WIDTH / 2 && j < HEIGHT / 2)
+				scratch_buf_check(&data, &dst, i, j, COLOR_4C);
+			else
+				scratch_buf_check(&data, &dst, i, j, COLOR_C4);
+		}
+	}
+}
diff --git a/tests/i915/gem_gtt_cpu_tlb.c b/tests/i915/gem_gtt_cpu_tlb.c
new file mode 100644
index 00000000..8ceef44c
--- /dev/null
+++ b/tests/i915/gem_gtt_cpu_tlb.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/** @file gem_gtt_cpu_tlb.c
+ *
+ * This test checks whether gtt tlbs for cpu access are correctly invalidated.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Check whether gtt tlbs for cpu access are correctly"
+		     " invalidated.");
+
+#define OBJ_SIZE (1024*1024)
+
+#define PAGE_SIZE 4096
+
+static uint32_t
+create_bo(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+	int i;
+
+	handle = gem_create(fd, OBJ_SIZE);
+
+	/* Fill the BO with dwords starting at start_val */
+	data = gem_mmap__gtt(fd, handle, OBJ_SIZE, PROT_READ | PROT_WRITE);
+	for (i = 0; i < OBJ_SIZE/4; i++)
+		data[i] = i;
+	munmap(data, OBJ_SIZE);
+
+	return handle;
+}
+
+igt_simple_main
+{
+	int fd;
+	int i;
+	uint32_t handle;
+
+	uint32_t *ptr;
+
+	igt_skip_on_simulation();
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	handle = gem_create(fd, OBJ_SIZE);
+
+	/* touch one page */
+	ptr = gem_mmap__gtt(fd, handle, OBJ_SIZE, PROT_READ | PROT_WRITE);
+	*ptr = 0xdeadbeef;
+	munmap(ptr, OBJ_SIZE);
+
+	gem_close(fd, handle);
+
+	/* stirr up the page allocator a bit. */
+	ptr = malloc(OBJ_SIZE);
+	igt_assert(ptr);
+	memset(ptr, 0x1, OBJ_SIZE);
+
+	handle = create_bo(fd);
+
+	/* Read a bunch of random subsets of the data and check that they come
+	 * out right.
+	 */
+	gem_read(fd, handle, 0, ptr, OBJ_SIZE);
+	for (i = 0; i < OBJ_SIZE/4; i++)
+		igt_assert(ptr[i] == i);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_gtt_hog.c b/tests/i915/gem_gtt_hog.c
new file mode 100644
index 00000000..ca730649
--- /dev/null
+++ b/tests/i915/gem_gtt_hog.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#include "drm.h"
+
+static const uint32_t canary = 0xdeadbeef;
+
+typedef struct data {
+	int fd;
+	int devid;
+	int intel_gen;
+} data_t;
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end)
+{
+	return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec);
+}
+
+static void busy(data_t *data, uint32_t handle, int size, int loops)
+{
+	struct drm_i915_gem_relocation_entry reloc[20];
+	struct drm_i915_gem_exec_object2 gem_exec[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_pwrite gem_pwrite;
+	struct drm_i915_gem_create create;
+	uint32_t buf[170], *b;
+	int i;
+
+	memset(reloc, 0, sizeof(reloc));
+	memset(gem_exec, 0, sizeof(gem_exec));
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	b = buf;
+	for (i = 0; i < 20; i++) {
+		*b++ = XY_COLOR_BLT_CMD_NOLEN |
+			((data->intel_gen >= 8) ? 5 : 4) |
+			COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+		*b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096;
+		*b++ = 0;
+		*b++ = size >> 12 << 16 | 1024;
+		reloc[i].offset = (b - buf) * sizeof(uint32_t);
+		reloc[i].target_handle = handle;
+		reloc[i].read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc[i].write_domain = I915_GEM_DOMAIN_RENDER;
+		*b++ = 0;
+		if (data->intel_gen >= 8)
+			*b++ = 0;
+		*b++ = canary;
+	}
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - buf) & 1)
+		*b++ = 0;
+
+	gem_exec[0].handle = handle;
+	gem_exec[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	create.handle = 0;
+	create.size = 4096;
+	drmIoctl(data->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+	gem_exec[1].handle = create.handle;
+	gem_exec[1].relocation_count = 20;
+	gem_exec[1].relocs_ptr = to_user_pointer(reloc);
+
+	execbuf.buffers_ptr = to_user_pointer(gem_exec);
+	execbuf.buffer_count = 2;
+	execbuf.batch_len = (b - buf) * sizeof(buf[0]);
+	execbuf.flags = 1 << 11;
+	if (HAS_BLT_RING(data->devid))
+		execbuf.flags |= I915_EXEC_BLT;
+
+	gem_pwrite.handle = gem_exec[1].handle;
+	gem_pwrite.offset = 0;
+	gem_pwrite.size = execbuf.batch_len;
+	gem_pwrite.data_ptr = to_user_pointer(buf);
+	if (drmIoctl(data->fd, DRM_IOCTL_I915_GEM_PWRITE, &gem_pwrite) == 0) {
+		while (loops--)
+			gem_execbuf(data->fd, &execbuf);
+	}
+
+	drmIoctl(data->fd, DRM_IOCTL_GEM_CLOSE, &create.handle);
+}
+
+static void run(data_t *data, int child)
+{
+	const int size = 4096 * (256 + child * child);
+	const int tiling = child % 2;
+	const int write = child % 2;
+	uint32_t handle = gem_create(data->fd, size);
+	uint32_t *ptr;
+	uint32_t x;
+
+	igt_assert(handle);
+
+	if (tiling != I915_TILING_NONE)
+		gem_set_tiling(data->fd, handle, tiling, 4096);
+
+	/* load up the unfaulted bo */
+	busy(data, handle, size, 100);
+
+	/* Note that we ignore the API and rely on the implict
+	 * set-to-gtt-domain within the fault handler.
+	 */
+	if (write) {
+		ptr = gem_mmap__gtt(data->fd, handle, size,
+				    PROT_READ | PROT_WRITE);
+		ptr[rand() % (size / 4)] = canary;
+	} else {
+		ptr = gem_mmap__gtt(data->fd, handle, size, PROT_READ);
+	}
+	x = ptr[rand() % (size / 4)];
+	munmap(ptr, size);
+
+	igt_assert_eq_u32(x, canary);
+}
+
+igt_simple_main
+{
+	struct timeval start, end;
+	pid_t children[64];
+	data_t data = {};
+
+	/* check for an intel gpu before goint nuts. */
+	int fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+	close(fd);
+
+	igt_skip_on_simulation();
+
+	data.fd = drm_open_driver(DRIVER_INTEL);
+	data.devid = intel_get_drm_devid(data.fd);
+	data.intel_gen = intel_gen(data.devid);
+
+	gettimeofday(&start, NULL);
+	igt_fork(child, ARRAY_SIZE(children))
+		run(&data, child);
+	igt_waitchildren();
+	gettimeofday(&end, NULL);
+
+	igt_info("Time to execute %zu children:		%7.3fms\n",
+		 ARRAY_SIZE(children), elapsed(&start, &end) / 1000);
+}
diff --git a/tests/i915/gem_gtt_speed.c b/tests/i915/gem_gtt_speed.c
new file mode 100644
index 00000000..3d726c4e
--- /dev/null
+++ b/tests/i915/gem_gtt_speed.c
@@ -0,0 +1,510 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include "igt_x86.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+#define OBJECT_SIZE 16384
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+#if defined(__x86_64__) && !defined(__clang__)
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#include <smmintrin.h>
+__attribute__((noinline))
+static void streaming_load(void *src, int len)
+{
+	__m128i tmp, *s = src;
+
+	igt_assert((len & 15) == 0);
+	igt_assert((((uintptr_t)src) & 15) == 0);
+
+	while (len >= 16) {
+		tmp += _mm_stream_load_si128(s++);
+		len -= 16;
+
+	}
+
+	*(volatile __m128i *)src = tmp;
+}
+static inline unsigned x86_64_features(void)
+{
+	return igt_x86_features();
+}
+#pragma GCC pop_options
+#else
+static inline unsigned x86_64_features(void)
+{
+	return 0;
+}
+static void streaming_load(void *src, int len)
+{
+	igt_assert(!"reached");
+}
+#endif
+
+int main(int argc, char **argv)
+{
+	struct timeval start, end;
+	uint8_t *buf;
+	uint32_t handle;
+	unsigned cpu = x86_64_features();
+	int size = OBJECT_SIZE;
+	int loop, i, tiling;
+	int fd;
+
+	igt_simple_init(argc, argv);
+
+	igt_skip_on_simulation();
+
+	if (argc > 1)
+		size = atoi(argv[1]);
+	if (size == 0) {
+		igt_warn("Invalid object size specified\n");
+		return 1;
+	}
+
+	if (cpu) {
+		char str[1024];
+		igt_info("Detected cpu faatures: %s\n",
+			 igt_x86_features_to_string(cpu, str));
+	}
+
+	buf = malloc(size);
+	memset(buf, 0, size);
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	handle = gem_create(fd, size);
+	igt_assert(handle);
+
+	for (tiling = I915_TILING_NONE; tiling <= I915_TILING_Y; tiling++) {
+		if (tiling != I915_TILING_NONE) {
+			igt_info("\nSetting tiling mode to %s\n",
+				 tiling == I915_TILING_X ? "X" : "Y");
+			gem_set_tiling(fd, handle, tiling, 512);
+		}
+
+		if (tiling == I915_TILING_NONE) {
+			gem_set_domain(fd, handle,
+				       I915_GEM_DOMAIN_CPU,
+				       I915_GEM_DOMAIN_CPU);
+
+			{
+				uint32_t *base = gem_mmap__cpu(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+				volatile uint32_t *ptr = base;
+				int x = 0;
+
+				for (i = 0; i < size/sizeof(*ptr); i++)
+					x += ptr[i];
+
+				/* force overly clever gcc to actually compute x */
+				ptr[0] = x;
+
+				munmap(base, size);
+
+				/* mmap read */
+				gettimeofday(&start, NULL);
+				for (loop = 0; loop < 1000; loop++) {
+					base = gem_mmap__cpu(fd, handle, 0,
+							     size,
+							     PROT_READ | PROT_WRITE);
+					ptr = base;
+					x = 0;
+
+					for (i = 0; i < size/sizeof(*ptr); i++)
+						x += ptr[i];
+
+					/* force overly clever gcc to actually compute x */
+					ptr[0] = x;
+
+					munmap(base, size);
+				}
+				gettimeofday(&end, NULL);
+				igt_info("Time to read %dk through a CPU map:		%7.3fµs\n",
+					 size/1024, elapsed(&start, &end, loop));
+				{
+					base = gem_mmap__cpu(fd, handle, 0,
+							     size,
+							     PROT_READ | PROT_WRITE);
+					gettimeofday(&start, NULL);
+					for (loop = 0; loop < 1000; loop++) {
+						ptr = base;
+						x = 0;
+
+						for (i = 0; i < size/sizeof(*ptr); i++)
+							x += ptr[i];
+
+						/* force overly clever gcc to actually compute x */
+						ptr[0] = x;
+
+					}
+					gettimeofday(&end, NULL);
+					munmap(base, size);
+					igt_info("Time to read %dk through a cached CPU map:	%7.3fµs\n",
+						 size/1024, elapsed(&start, &end, loop));
+				}
+
+				/* mmap write */
+				gettimeofday(&start, NULL);
+				for (loop = 0; loop < 1000; loop++) {
+					base = gem_mmap__cpu(fd, handle, 0,
+							     size,
+							     PROT_READ | PROT_WRITE);
+					ptr = base;
+
+					for (i = 0; i < size/sizeof(*ptr); i++)
+						ptr[i] = i;
+
+					munmap(base, size);
+				}
+				gettimeofday(&end, NULL);
+				igt_info("Time to write %dk through a CPU map:		%7.3fµs\n",
+					 size/1024, elapsed(&start, &end, loop));
+
+				gettimeofday(&start, NULL);
+				for (loop = 0; loop < 1000; loop++) {
+					base = gem_mmap__cpu(fd, handle, 0,
+							     size,
+							     PROT_READ | PROT_WRITE);
+					memset(base, 0, size);
+					munmap(base, size);
+				}
+				gettimeofday(&end, NULL);
+				igt_info("Time to clear %dk through a CPU map:		%7.3fµs\n",
+					 size/1024, elapsed(&start, &end, loop));
+
+				gettimeofday(&start, NULL);
+				base = gem_mmap__cpu(fd, handle, 0, size,
+						     PROT_READ | PROT_WRITE);
+				for (loop = 0; loop < 1000; loop++)
+					memset(base, 0, size);
+				munmap(base, size);
+				gettimeofday(&end, NULL);
+				igt_info("Time to clear %dk through a cached CPU map:	%7.3fµs\n",
+					 size/1024, elapsed(&start, &end, loop));
+			}
+
+			/* CPU pwrite */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++)
+				gem_write(fd, handle, 0, buf, size);
+			gettimeofday(&end, NULL);
+			igt_info("Time to pwrite %dk through the CPU:		%7.3fµs\n",
+				 size/1024, elapsed(&start, &end, loop));
+
+			/* CPU pread */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++)
+				gem_read(fd, handle, 0, buf, size);
+			gettimeofday(&end, NULL);
+			igt_info("Time to pread %dk through the CPU:		%7.3fµs\n",
+				 size/1024, elapsed(&start, &end, loop));
+		}
+
+		/* prefault into gtt */
+		{
+			uint32_t *base = gem_mmap__gtt(fd, handle, size, PROT_READ | PROT_WRITE);
+			volatile uint32_t *ptr = base;
+			int x = 0;
+
+			for (i = 0; i < size/sizeof(*ptr); i++)
+				x += ptr[i];
+
+			/* force overly clever gcc to actually compute x */
+			ptr[0] = x;
+
+			munmap(base, size);
+		}
+		/* mmap read */
+		gettimeofday(&start, NULL);
+		for (loop = 0; loop < 1000; loop++) {
+			uint32_t *base = gem_mmap__gtt(fd, handle, size, PROT_READ | PROT_WRITE);
+			volatile uint32_t *ptr = base;
+			int x = 0;
+
+			for (i = 0; i < size/sizeof(*ptr); i++)
+				x += ptr[i];
+
+			/* force overly clever gcc to actually compute x */
+			ptr[0] = x;
+
+			munmap(base, size);
+		}
+		gettimeofday(&end, NULL);
+		igt_info("Time to read %dk through a GTT map:		%7.3fµs\n",
+			 size/1024, elapsed(&start, &end, loop));
+
+		if (gem_mmap__has_wc(fd)) {
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+				volatile uint32_t *ptr = base;
+				int x = 0;
+
+				for (i = 0; i < size/sizeof(*ptr); i++)
+					x += ptr[i];
+
+				/* force overly clever gcc to actually compute x */
+				ptr[0] = x;
+
+				munmap(base, size);
+			}
+			gettimeofday(&end, NULL);
+			igt_info("Time to read %dk through a WC map:		%7.3fµs\n",
+					size/1024, elapsed(&start, &end, loop));
+
+			{
+				uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+				gettimeofday(&start, NULL);
+				for (loop = 0; loop < 1000; loop++) {
+					volatile uint32_t *ptr = base;
+					int x = 0;
+
+					for (i = 0; i < size/sizeof(*ptr); i++)
+						x += ptr[i];
+
+					/* force overly clever gcc to actually compute x */
+					ptr[0] = x;
+
+				}
+				gettimeofday(&end, NULL);
+				munmap(base, size);
+			}
+			igt_info("Time to read %dk through a cached WC map:	%7.3fµs\n",
+				 size/1024, elapsed(&start, &end, loop));
+
+			/* Check streaming loads from WC */
+			if (cpu & SSE4_1) {
+				gettimeofday(&start, NULL);
+				for (loop = 0; loop < 1000; loop++) {
+					uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+					streaming_load(base, size);
+
+					munmap(base, size);
+				}
+				gettimeofday(&end, NULL);
+				igt_info("Time to stream %dk from a WC map:		%7.3fµs\n",
+					 size/1024, elapsed(&start, &end, loop));
+
+				{
+					uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+					gettimeofday(&start, NULL);
+					for (loop = 0; loop < 1000; loop++)
+						streaming_load(base, size);
+					gettimeofday(&end, NULL);
+					munmap(base, size);
+				}
+				igt_info("Time to stream %dk from a cached WC map:	%7.3fµs\n",
+					 size/1024, elapsed(&start, &end, loop));
+			}
+		}
+
+
+		/* mmap write */
+		gettimeofday(&start, NULL);
+		for (loop = 0; loop < 1000; loop++) {
+			uint32_t *base = gem_mmap__gtt(fd, handle, size, PROT_READ | PROT_WRITE);
+			volatile uint32_t *ptr = base;
+
+			for (i = 0; i < size/sizeof(*ptr); i++)
+				ptr[i] = i;
+
+			munmap(base, size);
+		}
+		gettimeofday(&end, NULL);
+		igt_info("Time to write %dk through a GTT map:		%7.3fµs\n",
+			 size/1024, elapsed(&start, &end, loop));
+
+		if (gem_mmap__has_wc(fd)) {
+			/* mmap write */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+				volatile uint32_t *ptr = base;
+
+				for (i = 0; i < size/sizeof(*ptr); i++)
+					ptr[i] = i;
+
+				munmap(base, size);
+			}
+			gettimeofday(&end, NULL);
+			igt_info("Time to write %dk through a WC map:		%7.3fµs\n",
+					size/1024, elapsed(&start, &end, loop));
+		}
+
+		/* mmap clear */
+		gettimeofday(&start, NULL);
+		for (loop = 0; loop < 1000; loop++) {
+			uint32_t *base = gem_mmap__gtt(fd, handle, size, PROT_READ | PROT_WRITE);
+			memset(base, 0, size);
+			munmap(base, size);
+		}
+		gettimeofday(&end, NULL);
+		igt_info("Time to clear %dk through a GTT map:		%7.3fµs\n",
+			 size/1024, elapsed(&start, &end, loop));
+
+		if (gem_mmap__has_wc(fd)) {
+			/* mmap clear */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+				memset(base, 0, size);
+				munmap(base, size);
+			}
+			gettimeofday(&end, NULL);
+			igt_info("Time to clear %dk through a WC map:		%7.3fµs\n",
+					size/1024, elapsed(&start, &end, loop));
+		}
+
+		gettimeofday(&start, NULL);{
+			uint32_t *base = gem_mmap__gtt(fd, handle, size, PROT_READ | PROT_WRITE);
+			for (loop = 0; loop < 1000; loop++)
+				memset(base, 0, size);
+			munmap(base, size);
+		} gettimeofday(&end, NULL);
+		igt_info("Time to clear %dk through a cached GTT map:	%7.3fµs\n",
+			 size/1024, elapsed(&start, &end, loop));
+
+		if (gem_mmap__has_wc(fd)) {
+			gettimeofday(&start, NULL);{
+				uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+				for (loop = 0; loop < 1000; loop++)
+					memset(base, 0, size);
+				munmap(base, size);
+			} gettimeofday(&end, NULL);
+			igt_info("Time to clear %dk through a cached WC map:	%7.3fµs\n",
+					size/1024, elapsed(&start, &end, loop));
+		}
+
+		/* mmap read */
+		gettimeofday(&start, NULL);
+		for (loop = 0; loop < 1000; loop++) {
+			uint32_t *base = gem_mmap__gtt(fd, handle, size, PROT_READ | PROT_WRITE);
+			volatile uint32_t *ptr = base;
+			int x = 0;
+
+			for (i = 0; i < size/sizeof(*ptr); i++)
+				x += ptr[i];
+
+			/* force overly clever gcc to actually compute x */
+			ptr[0] = x;
+
+			munmap(base, size);
+		}
+		gettimeofday(&end, NULL);
+		igt_info("Time to read %dk (again) through a GTT map:	%7.3fµs\n",
+			 size/1024, elapsed(&start, &end, loop));
+
+		if (tiling == I915_TILING_NONE) {
+			/* GTT pwrite */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++)
+				gem_write(fd, handle, 0, buf, size);
+			gettimeofday(&end, NULL);
+			igt_info("Time to pwrite %dk through the GTT:		%7.3fµs\n",
+				 size/1024, elapsed(&start, &end, loop));
+
+			/* GTT pread */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++)
+				gem_read(fd, handle, 0, buf, size);
+			gettimeofday(&end, NULL);
+			igt_info("Time to pread %dk through the GTT:		%7.3fµs\n",
+				 size/1024, elapsed(&start, &end, loop));
+
+			/* GTT pwrite, including clflush */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				gem_write(fd, handle, 0, buf, size);
+				gem_sync(fd, handle);
+			}
+			gettimeofday(&end, NULL);
+			igt_info("Time to pwrite %dk through the GTT (clflush):	%7.3fµs\n",
+				 size/1024, elapsed(&start, &end, loop));
+
+			/* GTT pread, including clflush */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				gem_sync(fd, handle);
+				gem_read(fd, handle, 0, buf, size);
+			}
+			gettimeofday(&end, NULL);
+			igt_info("Time to pread %dk through the GTT (clflush):	%7.3fµs\n",
+				 size/1024, elapsed(&start, &end, loop));
+
+			/* partial writes */
+			igt_info("Now partial writes.\n");
+			size /= 4;
+
+			/* partial GTT pwrite, including clflush */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				gem_write(fd, handle, 0, buf, size);
+				gem_sync(fd, handle);
+			}
+			gettimeofday(&end, NULL);
+			igt_info("Time to pwrite %dk through the GTT (clflush):	%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+
+			/* partial GTT pread, including clflush */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				gem_sync(fd, handle);
+				gem_read(fd, handle, 0, buf, size);
+			}
+			gettimeofday(&end, NULL);
+			igt_info("Time to pread %dk through the GTT (clflush):	%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+
+			size *= 4;
+		}
+	}
+
+	gem_close(fd, handle);
+	close(fd);
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_hang.c b/tests/i915/gem_hang.c
new file mode 100644
index 00000000..f506fc70
--- /dev/null
+++ b/tests/i915/gem_hang.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static int bad_pipe;
+
+static void
+gpu_hang(void)
+{
+	int cmd;
+
+	cmd = bad_pipe ? MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW :
+		MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
+
+	BEGIN_BATCH(6, 0);
+	/* The documentation says that the LOAD_SCAN_LINES command
+	 * always comes in pairs. Don't ask me why. */
+	OUT_BATCH(MI_LOAD_SCAN_LINES_INCL | (bad_pipe << 20));
+	OUT_BATCH((0 << 16) | 2048);
+	OUT_BATCH(MI_LOAD_SCAN_LINES_INCL | (bad_pipe << 20));
+	OUT_BATCH((0 << 16) | 2048);
+	OUT_BATCH(MI_WAIT_FOR_EVENT | cmd);
+	OUT_BATCH(MI_NOOP);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	igt_simple_init(argc, argv);
+
+	igt_assert_f(argc == 2,
+		     "usage: %s <disabled pipe number>\n",
+		     argv[0]);
+
+	bad_pipe = atoi(argv[1]);
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	gpu_hang();
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_largeobject.c b/tests/i915/gem_largeobject.c
new file mode 100644
index 00000000..518396fa
--- /dev/null
+++ b/tests/i915/gem_largeobject.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jesse Barnes <jbarnes@virtuousgeek.org>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+/* Should take 64 pages to store the page pointers on 64 bit */
+#define OBJ_SIZE (128 * 1024 * 1024)
+
+unsigned char *data;
+
+static void
+test_large_object(int fd)
+{
+	struct drm_i915_gem_create create;
+	struct drm_i915_gem_pin pin;
+	uint32_t obj_size;
+	char *ptr;
+
+	memset(&create, 0, sizeof(create));
+	memset(&pin, 0, sizeof(pin));
+
+	if (gem_aperture_size(fd)*3/4 < OBJ_SIZE/2)
+		obj_size = OBJ_SIZE / 4;
+	else if (gem_aperture_size(fd)*3/4 < OBJ_SIZE)
+		obj_size = OBJ_SIZE / 2;
+	else
+		obj_size = OBJ_SIZE;
+	create.size = obj_size;
+	igt_info("obj size %i\n", obj_size);
+
+	igt_assert(ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create) == 0);
+
+	/* prefault */
+	ptr = gem_mmap__gtt(fd, create.handle, obj_size,
+			    PROT_WRITE | PROT_READ);
+	*ptr = 0;
+
+	gem_write(fd, create.handle, 0, data, obj_size);
+
+	/* kernel should clean this up for us */
+}
+
+igt_simple_main
+{
+	int fd;
+
+	igt_skip_on_simulation();
+
+	data = malloc(OBJ_SIZE);
+	igt_assert(data);
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	test_large_object(fd);
+
+	free(data);
+}
diff --git a/tests/i915/gem_linear_blits.c b/tests/i915/gem_linear_blits.c
new file mode 100644
index 00000000..6afa4e9c
--- /dev/null
+++ b/tests/i915/gem_linear_blits.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_linear_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Test doing many blits with a working set larger than the"
+		     " aperture size.");
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static uint32_t linear[WIDTH*HEIGHT];
+
+static void
+copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[12];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	int i = 0;
+
+	batch[i++] = XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB;
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i - 1] |= 8;
+	else
+		batch[i - 1] |= 6;
+
+	batch[i++] = (3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  WIDTH*4;
+	batch[i++] = 0; /* dst x1,y1 */
+	batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
+	batch[i++] = 0; /* dst reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0;
+	batch[i++] = 0; /* src x1,y1 */
+	batch[i++] = WIDTH*4;
+	batch[i++] = 0; /* src reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0;
+	batch[i++] = MI_BATCH_BUFFER_END;
+	batch[i++] = MI_NOOP;
+
+	memset(reloc, 0, sizeof(reloc));
+	reloc[0].target_handle = dst;
+	reloc[0].delta = 0;
+	reloc[0].offset = 4 * sizeof(batch[0]);
+	reloc[0].presumed_offset = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	reloc[1].target_handle = src;
+	reloc[1].delta = 0;
+	reloc[1].offset = 7 * sizeof(batch[0]);
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		reloc[1].offset += sizeof(batch[0]);
+	reloc[1].presumed_offset = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = dst;
+	obj[1].handle = src;
+	obj[2].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[2].handle, 0, batch, i * sizeof(batch[0]));
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+
+	memset(&exec, 0, sizeof(exec));
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.buffer_count = 3;
+	exec.batch_len = i * sizeof(batch[0]);
+	exec.flags = gem_has_blt(fd) ? I915_EXEC_BLT : 0;
+
+	gem_execbuf(fd, &exec);
+	gem_close(fd, obj[2].handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+
+	/* Fill the BO with dwords starting at val */
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		linear[i] = val++;
+	gem_write(fd, handle, 0, linear, sizeof(linear));
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	int num_errors;
+	int i;
+
+	gem_read(fd, handle, 0, linear, sizeof(linear));
+
+	num_errors = 0;
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		if (linear[i] != val && num_errors++ < 32)
+			igt_warn("[%08x] Expected 0x%08x, found 0x%08x (difference 0x%08x)\n",
+				 i * 4, val, linear[i], val ^ linear[i]);
+		val++;
+	}
+	igt_assert_eq(num_errors, 0);
+}
+
+static void run_test(int fd, int count)
+{
+	uint32_t *handle, *start_val;
+	uint32_t start = 0;
+	int i;
+
+	igt_debug("Using %d 1MiB buffers\n", count);
+
+	handle = malloc(sizeof(uint32_t)*count*2);
+	start_val = handle + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	igt_debug("Verifying initialisation...\n");
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_debug("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_debug("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_debug("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++) {
+		check_bo(fd, handle[i], start_val[i]);
+		gem_close(fd, handle[i]);
+	}
+
+	free(handle);
+}
+
+#define MAX_32b ((1ull << 32) - 4096)
+
+int main(int argc, char **argv)
+{
+	int fd = 0;
+
+	igt_subtest_init(argc, argv);
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	igt_subtest("basic")
+		run_test(fd, 2);
+
+	igt_subtest("normal") {
+		uint64_t count;
+
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / (1024*1024) / 2;
+		igt_require(count > 1);
+		intel_require_memory(count, sizeof(linear), CHECK_RAM);
+
+		run_test(fd, count);
+	}
+
+	igt_subtest("interruptible") {
+		uint64_t count;
+
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / (1024*1024) / 2;
+		igt_require(count > 1);
+		intel_require_memory(count, sizeof(linear), CHECK_RAM);
+
+		igt_fork_signal_helper();
+		run_test(fd, count);
+		igt_stop_signal_helper();
+	}
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_lut_handle.c b/tests/i915/gem_lut_handle.c
new file mode 100644
index 00000000..fec65dd8
--- /dev/null
+++ b/tests/i915/gem_lut_handle.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright © 2012,2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/* Exercises the basic execbuffer using the handle LUT interface */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Exercises the basic execbuffer using the handle LUT"
+		     " interface.");
+
+#define BATCH_SIZE		(1024*1024)
+
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define NORMAL 0
+#define USE_LUT 0x1
+#define BROKEN 0x2
+
+static int exec(int fd, uint32_t handle, unsigned int flags)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec[1];
+	struct drm_i915_gem_relocation_entry gem_reloc[1];
+
+	gem_reloc[0].offset = 1024;
+	gem_reloc[0].delta = 0;
+	gem_reloc[0].target_handle =
+		!!(flags & USE_LUT) ^ !!(flags & BROKEN) ? 0 : handle;
+	gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	gem_reloc[0].write_domain = 0;
+	gem_reloc[0].presumed_offset = 0;
+
+	gem_exec[0].handle = handle;
+	gem_exec[0].relocation_count = 1;
+	gem_exec[0].relocs_ptr = to_user_pointer(gem_reloc);
+	gem_exec[0].alignment = 0;
+	gem_exec[0].offset = 0;
+	gem_exec[0].flags = 0;
+	gem_exec[0].rsvd1 = 0;
+	gem_exec[0].rsvd2 = 0;
+
+	execbuf.buffers_ptr = to_user_pointer(gem_exec);
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 8;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = flags & USE_LUT ? LOCAL_I915_EXEC_HANDLE_LUT : 0;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	return __gem_execbuf(fd, &execbuf);
+}
+
+static int many_exec(int fd, uint32_t batch, int num_exec, int num_reloc, unsigned flags)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 *gem_exec;
+	struct drm_i915_gem_relocation_entry *gem_reloc;
+	unsigned max_handle = batch;
+	int ret, n;
+
+	gem_exec = calloc(num_exec+1, sizeof(*gem_exec));
+	gem_reloc = calloc(num_reloc, sizeof(*gem_reloc));
+	igt_assert(gem_exec && gem_reloc);
+
+	for (n = 0; n < num_exec; n++) {
+		gem_exec[n].handle = gem_create(fd, 4096);
+		if (gem_exec[n].handle > max_handle)
+			max_handle = gem_exec[n].handle;
+		gem_exec[n].relocation_count = 0;
+		gem_exec[n].relocs_ptr = 0;
+		gem_exec[n].alignment = 0;
+		gem_exec[n].offset = 0;
+		gem_exec[n].flags = 0;
+		gem_exec[n].rsvd1 = 0;
+		gem_exec[n].rsvd2 = 0;
+	}
+
+	gem_exec[n].handle = batch;
+	gem_exec[n].relocation_count = num_reloc;
+	gem_exec[n].relocs_ptr = to_user_pointer(gem_reloc);
+
+	if (flags & USE_LUT)
+		max_handle = num_exec + 1;
+	max_handle++;
+
+	for (n = 0; n < num_reloc; n++) {
+		uint32_t target;
+
+		if (flags & BROKEN) {
+			target = -(rand() % 4096) - 1;
+		} else {
+			target = rand() % (num_exec + 1);
+			if ((flags & USE_LUT) == 0)
+				target = gem_exec[target].handle;
+		}
+
+		gem_reloc[n].offset = 1024;
+		gem_reloc[n].delta = 0;
+		gem_reloc[n].target_handle = target;
+		gem_reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
+		gem_reloc[n].write_domain = 0;
+		gem_reloc[n].presumed_offset = 0;
+	}
+
+	execbuf.buffers_ptr = to_user_pointer(gem_exec);
+	execbuf.buffer_count = num_exec + 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 8;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = flags & USE_LUT ? LOCAL_I915_EXEC_HANDLE_LUT : 0;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	ret = __gem_execbuf(fd, &execbuf);
+
+	for (n = 0; n < num_exec; n++)
+		gem_close(fd, gem_exec[n].handle);
+
+	free(gem_exec);
+	free(gem_reloc);
+
+	return ret;
+}
+
+#define fail(x) igt_assert((x) == -ENOENT)
+#define pass(x) igt_assert((x) == 0)
+
+igt_simple_main
+{
+	uint32_t batch[2] = {MI_BATCH_BUFFER_END};
+	uint32_t handle;
+	int fd, i;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	do_or_die(exec(fd, handle, NORMAL));
+	fail(exec(fd, handle, BROKEN));
+
+	igt_skip_on(exec(fd, handle, USE_LUT));
+
+	do_or_die(exec(fd, handle, USE_LUT));
+	fail(exec(fd, handle, USE_LUT | BROKEN));
+
+	for (i = 2; i <= SLOW_QUICK(65536, 8); i *= 2) {
+		if (many_exec(fd, handle, i+1, i+1, NORMAL) == -ENOSPC)
+			break;
+
+		pass(many_exec(fd, handle, i-1, i-1, NORMAL));
+		pass(many_exec(fd, handle, i-1, i, NORMAL));
+		pass(many_exec(fd, handle, i-1, i+1, NORMAL));
+		pass(many_exec(fd, handle, i, i-1, NORMAL));
+		pass(many_exec(fd, handle, i, i, NORMAL));
+		pass(many_exec(fd, handle, i, i+1, NORMAL));
+		pass(many_exec(fd, handle, i+1, i-1, NORMAL));
+		pass(many_exec(fd, handle, i+1, i, NORMAL));
+		pass(many_exec(fd, handle, i+1, i+1, NORMAL));
+
+		fail(many_exec(fd, handle, i-1, i-1, NORMAL | BROKEN));
+		fail(many_exec(fd, handle, i-1, i, NORMAL | BROKEN));
+		fail(many_exec(fd, handle, i-1, i+1, NORMAL | BROKEN));
+		fail(many_exec(fd, handle, i, i-1, NORMAL | BROKEN));
+		fail(many_exec(fd, handle, i, i, NORMAL | BROKEN));
+		fail(many_exec(fd, handle, i, i+1, NORMAL | BROKEN));
+		fail(many_exec(fd, handle, i+1, i-1, NORMAL | BROKEN));
+		fail(many_exec(fd, handle, i+1, i, NORMAL | BROKEN));
+		fail(many_exec(fd, handle, i+1, i+1, NORMAL | BROKEN));
+
+		pass(many_exec(fd, handle, i-1, i-1, USE_LUT));
+		pass(many_exec(fd, handle, i-1, i, USE_LUT));
+		pass(many_exec(fd, handle, i-1, i+1, USE_LUT));
+		pass(many_exec(fd, handle, i, i-1, USE_LUT));
+		pass(many_exec(fd, handle, i, i, USE_LUT));
+		pass(many_exec(fd, handle, i, i+1, USE_LUT));
+		pass(many_exec(fd, handle, i+1, i-1, USE_LUT));
+		pass(many_exec(fd, handle, i+1, i, USE_LUT));
+		pass(many_exec(fd, handle, i+1, i+1, USE_LUT));
+
+		fail(many_exec(fd, handle, i-1, i-1, USE_LUT | BROKEN));
+		fail(many_exec(fd, handle, i-1, i, USE_LUT | BROKEN));
+		fail(many_exec(fd, handle, i-1, i+1, USE_LUT | BROKEN));
+		fail(many_exec(fd, handle, i, i-1, USE_LUT | BROKEN));
+		fail(many_exec(fd, handle, i, i, USE_LUT | BROKEN));
+		fail(many_exec(fd, handle, i, i+1, USE_LUT | BROKEN));
+		fail(many_exec(fd, handle, i+1, i-1, USE_LUT | BROKEN));
+		fail(many_exec(fd, handle, i+1, i, USE_LUT | BROKEN));
+		fail(many_exec(fd, handle, i+1, i+1, USE_LUT | BROKEN));
+	}
+}
diff --git a/tests/i915/gem_madvise.c b/tests/i915/gem_madvise.c
new file mode 100644
index 00000000..729a4d33
--- /dev/null
+++ b/tests/i915/gem_madvise.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Checks that the kernel reports EFAULT when trying to use"
+		     " purged bo.");
+
+#define OBJECT_SIZE (1024*1024)
+
+/* Testcase: checks that the kernel reports EFAULT when trying to use purged bo
+ *
+ */
+
+static jmp_buf jmp;
+
+static void __attribute__((noreturn)) sigtrap(int sig)
+{
+	longjmp(jmp, sig);
+}
+
+static void
+dontneed_before_mmap(void)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+	uint32_t handle;
+	char *ptr;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+	ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	close(fd);
+
+	signal(SIGSEGV, sigtrap);
+	signal(SIGBUS, sigtrap);
+	switch (setjmp(jmp)) {
+	case SIGBUS:
+		break;
+	case 0:
+		*ptr = 0;
+	default:
+		igt_assert(!"reached");
+		break;
+	}
+	munmap(ptr, OBJECT_SIZE);
+	signal(SIGBUS, SIG_DFL);
+	signal(SIGSEGV, SIG_DFL);
+}
+
+static void
+dontneed_after_mmap(void)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+	uint32_t handle;
+	char *ptr;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	igt_assert(ptr);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+	close(fd);
+
+	signal(SIGBUS, sigtrap);
+	switch (setjmp(jmp)) {
+	case SIGBUS:
+		break;
+	case 0:
+		*ptr = 0;
+	default:
+		igt_assert(!"reached");
+		break;
+	}
+	munmap(ptr, OBJECT_SIZE);
+	signal(SIGBUS, SIG_DFL);
+}
+
+static void
+dontneed_before_pwrite(void)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+	uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+
+	igt_assert_eq(__gem_write(fd, handle, 0, &bbe, sizeof(bbe)), -EFAULT);
+
+	close(fd);
+}
+
+static void
+dontneed_before_exec(void)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec;
+	uint32_t buf[] = { MI_BATCH_BUFFER_END, 0 };
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	memset(&exec, 0, sizeof(exec));
+
+	exec.handle = gem_create(fd, OBJECT_SIZE);
+	gem_write(fd, exec.handle, 0, buf, sizeof(buf));
+	gem_madvise(fd, exec.handle, I915_MADV_DONTNEED);
+
+	execbuf.buffers_ptr = to_user_pointer(&exec);
+	execbuf.buffer_count = 1;
+	execbuf.batch_len = sizeof(buf);
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+
+	close(fd);
+}
+
+igt_main
+{
+	igt_skip_on_simulation();
+
+	igt_subtest("dontneed-before-mmap")
+		dontneed_before_mmap();
+
+	igt_subtest("dontneed-after-mmap")
+		dontneed_after_mmap();
+
+	igt_subtest("dontneed-before-pwrite")
+		dontneed_before_pwrite();
+
+	igt_subtest("dontneed-before-exec")
+		dontneed_before_exec();
+}
diff --git a/tests/i915/gem_media_fill.c b/tests/i915/gem_media_fill.c
new file mode 100644
index 00000000..109af129
--- /dev/null
+++ b/tests/i915/gem_media_fill.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Damien Lespiau <damien.lespiau@intel.com>
+ *    Xiang, Haihao <haihao.xiang@intel.com>
+ */
+
+/*
+ * This file is a basic test for the media_fill() function, a very simple
+ * workload for the Media pipeline.
+ */
+
+#include "igt.h"
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Basic test for the media_fill() function, a very simple"
+		     " workload for the Media pipeline.");
+
+#define WIDTH 64
+#define STRIDE (WIDTH)
+#define HEIGHT 64
+#define SIZE (HEIGHT*STRIDE)
+
+#define COLOR_C4	0xc4
+#define COLOR_4C	0x4c
+
+typedef struct {
+	int drm_fd;
+	uint32_t devid;
+	drm_intel_bufmgr *bufmgr;
+	uint8_t linear[WIDTH * HEIGHT];
+} data_t;
+
+static void scratch_buf_init(data_t *data, struct igt_buf *buf,
+			int width, int height, int stride, uint8_t color)
+{
+	drm_intel_bo *bo;
+	int i;
+
+	bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096);
+	for (i = 0; i < width * height; i++)
+		data->linear[i] = color;
+	gem_write(data->drm_fd, bo->handle, 0, data->linear,
+		sizeof(data->linear));
+
+	memset(buf, 0, sizeof(*buf));
+
+	buf->bo = bo;
+	buf->stride = stride;
+	buf->tiling = I915_TILING_NONE;
+	buf->size = SIZE;
+}
+
+static void
+scratch_buf_check(data_t *data, struct igt_buf *buf, int x, int y,
+		uint8_t color)
+{
+	uint8_t val;
+
+	gem_read(data->drm_fd, buf->bo->handle, 0,
+		data->linear, sizeof(data->linear));
+	val = data->linear[y * WIDTH + x];
+	igt_assert_f(val == color,
+		     "Expected 0x%02x, found 0x%02x at (%d,%d)\n",
+		     color, val, x, y);
+}
+
+igt_simple_main
+{
+	data_t data = {0, };
+	struct intel_batchbuffer *batch = NULL;
+	struct igt_buf dst;
+	igt_fillfunc_t media_fill = NULL;
+	int i, j;
+
+	data.drm_fd = drm_open_driver_render(DRIVER_INTEL);
+	igt_require_gem(data.drm_fd);
+
+	data.devid = intel_get_drm_devid(data.drm_fd);
+
+	data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
+	igt_assert(data.bufmgr);
+
+	media_fill = igt_get_media_fillfunc(data.devid);
+
+	igt_require_f(media_fill,
+		"no media-fill function\n");
+
+	batch = intel_batchbuffer_alloc(data.bufmgr, data.devid);
+	igt_assert(batch);
+
+	scratch_buf_init(&data, &dst, WIDTH, HEIGHT, STRIDE, COLOR_C4);
+
+	for (i = 0; i < WIDTH; i++) {
+		for (j = 0; j < HEIGHT; j++) {
+			scratch_buf_check(&data, &dst, i, j, COLOR_C4);
+		}
+	}
+
+	media_fill(batch,
+		&dst, 0, 0, WIDTH / 2, HEIGHT / 2,
+		COLOR_4C);
+
+	for (i = 0; i < WIDTH; i++) {
+		for (j = 0; j < HEIGHT; j++) {
+			if (i < WIDTH / 2 && j < HEIGHT / 2)
+				scratch_buf_check(&data, &dst, i, j, COLOR_4C);
+			else
+				scratch_buf_check(&data, &dst, i, j, COLOR_C4);
+		}
+	}
+}
diff --git a/tests/i915/gem_mmap.c b/tests/i915/gem_mmap.c
new file mode 100644
index 00000000..0ed15878
--- /dev/null
+++ b/tests/i915/gem_mmap.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+#define OBJECT_SIZE 16384
+#define PAGE_SIZE 4096
+int fd;
+
+static void
+test_huge_bo(int huge)
+{
+	uint64_t huge_object_size, last_offset, i;
+	unsigned check = CHECK_RAM;
+	char *ptr_cpu;
+	char *cpu_pattern;
+	uint32_t bo;
+	int loop;
+
+	switch (huge) {
+	case -1:
+		huge_object_size = gem_mappable_aperture_size() / 2;
+		break;
+	case 0:
+		huge_object_size = gem_mappable_aperture_size() + PAGE_SIZE;
+		break;
+	case 1:
+		huge_object_size = gem_aperture_size(fd) + PAGE_SIZE;
+		break;
+	case 2:
+		huge_object_size = (intel_get_total_ram_mb() + 1) << 20;
+		check |= CHECK_SWAP;
+		break;
+	default:
+		return;
+	}
+	intel_require_memory(1, huge_object_size, check);
+
+	last_offset = huge_object_size - PAGE_SIZE;
+
+	cpu_pattern = malloc(PAGE_SIZE);
+	igt_assert(cpu_pattern);
+	for (i = 0; i < PAGE_SIZE; i++)
+		cpu_pattern[i] = i;
+
+	bo = gem_create(fd, huge_object_size);
+
+	/* Obtain CPU mapping for the object. */
+	ptr_cpu = __gem_mmap__cpu(fd, bo, 0, huge_object_size,
+				PROT_READ | PROT_WRITE);
+	igt_require(ptr_cpu);
+	gem_set_domain(fd, bo, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	gem_close(fd, bo);
+
+	igt_debug("Exercising %'llu bytes\n", (long long)huge_object_size);
+
+	loop = 0;
+	do {
+		/* Write first page through the mapping and
+		 * assert reading it back works.
+		 */
+		memcpy(ptr_cpu, cpu_pattern, PAGE_SIZE);
+		igt_assert(memcmp(ptr_cpu, cpu_pattern, PAGE_SIZE) == 0);
+		memset(ptr_cpu, 0xcc, PAGE_SIZE);
+
+		/* Write last page through the mapping and
+		 * assert reading it back works.
+		 */
+		memcpy(ptr_cpu + last_offset, cpu_pattern, PAGE_SIZE);
+		igt_assert(memcmp(ptr_cpu + last_offset, cpu_pattern, PAGE_SIZE) == 0);
+		memset(ptr_cpu + last_offset, 0xcc, PAGE_SIZE);
+
+		/* Cross check that accessing two simultaneous pages works. */
+		igt_assert(memcmp(ptr_cpu, ptr_cpu + last_offset, PAGE_SIZE) == 0);
+
+		/* Force every page to be faulted and retest */
+		for (i = 0; i < huge_object_size; i += 4096)
+			ptr_cpu[i] = i >> 12;
+	} while (loop++ == 0);
+
+	munmap(ptr_cpu, huge_object_size);
+	free(cpu_pattern);
+}
+
+igt_main
+{
+	struct drm_i915_gem_mmap arg;
+	uint8_t expected[OBJECT_SIZE];
+	uint8_t buf[OBJECT_SIZE];
+	uint8_t *addr;
+	int ret;
+
+	igt_fixture
+		fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_subtest("bad-object") {
+		memset(&arg, 0, sizeof(arg));
+		arg.handle = 0x10101010;
+		arg.offset = 0;
+		arg.size = 4096;
+		ret = ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &arg);
+		igt_assert(ret == -1 && errno == ENOENT);
+	}
+
+	igt_subtest("basic") {
+		arg.handle = gem_create(fd, OBJECT_SIZE);
+		arg.offset = 0;
+		arg.size = OBJECT_SIZE;
+		ret = ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &arg);
+		igt_assert(ret == 0);
+		addr = from_user_pointer(arg.addr_ptr);
+
+		igt_info("Testing contents of newly created object.\n");
+		memset(expected, 0, sizeof(expected));
+		igt_assert(memcmp(addr, expected, sizeof(expected)) == 0);
+
+		igt_info("Testing coherency of writes and mmap reads.\n");
+		memset(buf, 0, sizeof(buf));
+		memset(buf + 1024, 0x01, 1024);
+		memset(expected + 1024, 0x01, 1024);
+		gem_write(fd, arg.handle, 0, buf, OBJECT_SIZE);
+		igt_assert(memcmp(buf, addr, sizeof(buf)) == 0);
+
+		igt_info("Testing that mapping stays after close\n");
+		gem_close(fd, arg.handle);
+		igt_assert(memcmp(buf, addr, sizeof(buf)) == 0);
+
+		igt_info("Testing unmapping\n");
+		munmap(addr, OBJECT_SIZE);
+	}
+
+	igt_subtest("short-mmap") {
+		igt_assert(OBJECT_SIZE > 4096);
+		arg.handle = gem_create(fd, OBJECT_SIZE);
+		addr = gem_mmap__cpu(fd, arg.handle, 0, 4096, PROT_WRITE);
+		memset(addr, 0, 4096);
+		munmap(addr, 4096);
+		gem_close(fd, arg.handle);
+	}
+
+	igt_subtest("basic-small-bo")
+		test_huge_bo(-1);
+	igt_subtest("big-bo")
+		test_huge_bo(0);
+	igt_subtest("huge-bo")
+		test_huge_bo(1);
+	igt_subtest("swap-bo")
+		test_huge_bo(2);
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_mmap_gtt.c b/tests/i915/gem_mmap_gtt.c
new file mode 100644
index 00000000..f6353555
--- /dev/null
+++ b/tests/i915/gem_mmap_gtt.c
@@ -0,0 +1,901 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+#include "igt.h"
+#include "igt_x86.h"
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+
+static int OBJECT_SIZE = 16*1024*1024;
+
+static void
+set_domain_gtt(int fd, uint32_t handle)
+{
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+}
+
+static void *
+mmap_bo(int fd, uint32_t handle)
+{
+	void *ptr;
+
+	ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	return ptr;
+}
+
+static void *
+create_pointer(int fd)
+{
+	uint32_t handle;
+	void *ptr;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	ptr = mmap_bo(fd, handle);
+
+	gem_close(fd, handle);
+
+	return ptr;
+}
+
+static void
+test_access(int fd)
+{
+	uint32_t handle, flink, handle2;
+	struct drm_i915_gem_mmap_gtt mmap_arg;
+	int fd2;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	igt_assert(handle);
+
+	fd2 = drm_open_driver(DRIVER_INTEL);
+
+	/* Check that fd1 can mmap. */
+	mmap_arg.handle = handle;
+	do_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
+
+	igt_assert(mmap64(0, OBJECT_SIZE, PROT_READ | PROT_WRITE,
+			  MAP_SHARED, fd, mmap_arg.offset));
+
+	/* Check that the same offset on the other fd doesn't work. */
+	igt_assert(mmap64(0, OBJECT_SIZE, PROT_READ | PROT_WRITE,
+			  MAP_SHARED, fd2, mmap_arg.offset) == MAP_FAILED);
+	igt_assert(errno == EACCES);
+
+	flink = gem_flink(fd, handle);
+	igt_assert(flink);
+	handle2 = gem_open(fd2, flink);
+	igt_assert(handle2);
+
+	/* Recheck that it works after flink. */
+	/* Check that the same offset on the other fd doesn't work. */
+	igt_assert(mmap64(0, OBJECT_SIZE, PROT_READ | PROT_WRITE,
+			  MAP_SHARED, fd2, mmap_arg.offset));
+}
+
+static void
+test_short(int fd)
+{
+	struct drm_i915_gem_mmap_gtt mmap_arg;
+	int pages, p;
+
+	mmap_arg.handle = gem_create(fd, OBJECT_SIZE);
+	igt_assert(mmap_arg.handle);
+
+	do_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
+	for (pages = 1; pages <= OBJECT_SIZE / PAGE_SIZE; pages <<= 1) {
+		uint8_t *r, *w;
+
+		w = mmap64(0, pages * PAGE_SIZE, PROT_READ | PROT_WRITE,
+			   MAP_SHARED, fd, mmap_arg.offset);
+		igt_assert(w != MAP_FAILED);
+
+		r = mmap64(0, pages * PAGE_SIZE, PROT_READ,
+			   MAP_SHARED, fd, mmap_arg.offset);
+		igt_assert(r != MAP_FAILED);
+
+		for (p = 0; p < pages; p++) {
+			w[p*PAGE_SIZE] = r[p*PAGE_SIZE];
+			w[p*PAGE_SIZE+(PAGE_SIZE-1)] =
+				r[p*PAGE_SIZE+(PAGE_SIZE-1)];
+		}
+
+		munmap(r, pages * PAGE_SIZE);
+		munmap(w, pages * PAGE_SIZE);
+	}
+	gem_close(fd, mmap_arg.handle);
+}
+
+static void
+test_copy(int fd)
+{
+	void *src, *dst;
+
+	/* copy from a fresh src to fresh dst to force pagefault on both */
+	src = create_pointer(fd);
+	dst = create_pointer(fd);
+
+	memcpy(dst, src, OBJECT_SIZE);
+	memcpy(src, dst, OBJECT_SIZE);
+
+	munmap(dst, OBJECT_SIZE);
+	munmap(src, OBJECT_SIZE);
+}
+
+enum test_read_write {
+	READ_BEFORE_WRITE,
+	READ_AFTER_WRITE,
+};
+
+static void
+test_read_write(int fd, enum test_read_write order)
+{
+	uint32_t handle;
+	void *ptr;
+	volatile uint32_t val = 0;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	if (order == READ_BEFORE_WRITE) {
+		val = *(uint32_t *)ptr;
+		*(uint32_t *)ptr = val;
+	} else {
+		*(uint32_t *)ptr = val;
+		val = *(uint32_t *)ptr;
+	}
+
+	gem_close(fd, handle);
+	munmap(ptr, OBJECT_SIZE);
+}
+
+static void
+test_read_write2(int fd, enum test_read_write order)
+{
+	uint32_t handle;
+	void *r, *w;
+	volatile uint32_t val = 0;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	r = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ);
+
+	w = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	if (order == READ_BEFORE_WRITE) {
+		val = *(uint32_t *)r;
+		*(uint32_t *)w = val;
+	} else {
+		*(uint32_t *)w = val;
+		val = *(uint32_t *)r;
+	}
+
+	gem_close(fd, handle);
+	munmap(r, OBJECT_SIZE);
+	munmap(w, OBJECT_SIZE);
+}
+
+static void
+test_write(int fd)
+{
+	void *src;
+	uint32_t dst;
+
+	/* copy from a fresh src to fresh dst to force pagefault on both */
+	src = create_pointer(fd);
+	dst = gem_create(fd, OBJECT_SIZE);
+
+	gem_write(fd, dst, 0, src, OBJECT_SIZE);
+
+	gem_close(fd, dst);
+	munmap(src, OBJECT_SIZE);
+}
+
+static void
+test_wc(int fd)
+{
+	unsigned long gtt_reads, gtt_writes, cpu_writes;
+	uint32_t handle;
+	void *gtt, *cpu;
+
+	handle = gem_create(fd, 4096);
+	cpu = gem_mmap__cpu(fd, handle, 0, 4096, PROT_READ | PROT_WRITE);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	gem_close(fd, handle);
+
+	handle = gem_create(fd, 4096);
+	gtt = gem_mmap__gtt(fd, handle, 4096, PROT_READ | PROT_WRITE);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(fd, handle);
+
+	gtt_reads = 0;
+	igt_for_milliseconds(200) {
+		memcpy(cpu, gtt, 4096);
+		gtt_reads++;
+	}
+	igt_debug("%lu GTT reads in 200us\n", gtt_reads);
+
+	gtt_writes = 0;
+	igt_for_milliseconds(200) {
+		memcpy(gtt, cpu, 4096);
+		gtt_writes++;
+	}
+	igt_debug("%lu GTT writes in 200us\n", gtt_writes);
+
+	if (igt_setup_clflush()) {
+		cpu_writes = 0;
+		igt_for_milliseconds(200) {
+			igt_clflush_range(cpu, 4096);
+			cpu_writes++;
+		}
+		igt_debug("%lu CPU writes in 200us\n", cpu_writes);
+	} else
+		cpu_writes = gtt_writes;
+
+	munmap(cpu, 4096);
+	munmap(gtt, 4096);
+
+	igt_assert_f(gtt_writes > 2*gtt_reads,
+		     "Write-Combined writes are expected to be much faster than reads: read=%.2fMiB/s, write=%.2fMiB/s\n",
+		     5*gtt_reads/256., 5*gtt_writes/256.);
+
+	igt_assert_f(gtt_writes > cpu_writes/2,
+		     "Write-Combined writes are expected to be roughly equivalent to WB writes: WC (gtt)=%.2fMiB/s, WB (cpu)=%.2fMiB/s\n",
+		     5*gtt_writes/256., 5*cpu_writes/256.);
+}
+
+static void
+test_write_gtt(int fd)
+{
+	uint32_t dst;
+	char *dst_gtt;
+	void *src;
+
+	dst = gem_create(fd, OBJECT_SIZE);
+
+	/* prefault object into gtt */
+	dst_gtt = mmap_bo(fd, dst);
+	set_domain_gtt(fd, dst);
+	memset(dst_gtt, 0, OBJECT_SIZE);
+	munmap(dst_gtt, OBJECT_SIZE);
+
+	src = create_pointer(fd);
+
+	gem_write(fd, dst, 0, src, OBJECT_SIZE);
+
+	gem_close(fd, dst);
+	munmap(src, OBJECT_SIZE);
+}
+
+static bool is_coherent(int i915)
+{
+	int val = 1; /* by default, we assume GTT is coherent, hence the test */
+	struct drm_i915_getparam gp = {
+		gp.param = 52, /* GTT_COHERENT */
+		gp.value = &val,
+	};
+
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return val;
+}
+
+static void
+test_coherency(int fd)
+{
+	uint32_t handle;
+	uint32_t *gtt, *cpu;
+	int i;
+
+	igt_require(is_coherent(fd));
+	igt_require(igt_setup_clflush());
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	gtt = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	cpu = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	set_domain_gtt(fd, handle);
+
+	/* On byt/bsw/bxt this detects an interesting behaviour where the
+	 * CPU cannot flush the iobar and so the read may bypass the write.
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=94314
+	 */
+	for (i = 0; i < OBJECT_SIZE / 64; i++) {
+		int x = 16*i + (i%16);
+		gtt[x] = i;
+		igt_clflush_range(&cpu[x], sizeof(cpu[x]));
+		igt_assert_eq(cpu[x], i);
+	}
+
+	munmap(cpu, OBJECT_SIZE);
+	munmap(gtt, OBJECT_SIZE);
+	gem_close(fd, handle);
+}
+
+static void
+test_clflush(int fd)
+{
+	uint32_t handle;
+	uint32_t *gtt;
+
+	igt_require(igt_setup_clflush());
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	gtt = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	set_domain_gtt(fd, handle);
+
+	igt_clflush_range(gtt, OBJECT_SIZE);
+
+	munmap(gtt, OBJECT_SIZE);
+	gem_close(fd, handle);
+}
+
+static void
+test_hang(int fd)
+{
+	igt_hang_t hang;
+	uint32_t patterns[] = {
+		0, 0xaaaaaaaa, 0x55555555, 0xcccccccc,
+	};
+	uint32_t *gtt[3];
+	int last_pattern = 0;
+	int next_pattern = 1;
+	int i;
+
+	for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) {
+		uint32_t handle;
+
+		handle = gem_create(fd, OBJECT_SIZE);
+		gem_set_tiling(fd, handle, i, 2048);
+
+		gtt[i] = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE);
+		set_domain_gtt(fd, handle);
+		gem_close(fd, handle);
+	}
+
+	hang = igt_hang_ring(fd, I915_EXEC_RENDER);
+
+	do {
+		for (i = 0; i < OBJECT_SIZE / 64; i++) {
+			int x = 16*i + (i%16);
+
+			igt_assert(gtt[0][x] == patterns[last_pattern]);
+			igt_assert(gtt[1][x] == patterns[last_pattern]);
+			igt_assert(gtt[2][x] == patterns[last_pattern]);
+
+			gtt[0][x] = patterns[next_pattern];
+			gtt[1][x] = patterns[next_pattern];
+			gtt[2][x] = patterns[next_pattern];
+		}
+
+		last_pattern = next_pattern;
+		next_pattern = (next_pattern + 1) % ARRAY_SIZE(patterns);
+	} while (gem_bo_busy(fd, hang.spin->handle));
+
+	igt_post_hang_ring(fd, hang);
+
+	munmap(gtt[0], OBJECT_SIZE);
+	munmap(gtt[1], OBJECT_SIZE);
+	munmap(gtt[2], OBJECT_SIZE);
+}
+
+static int min_tile_width(uint32_t devid, int tiling)
+{
+	if (tiling < 0) {
+		if (intel_gen(devid) >= 4)
+			return 4096 - min_tile_width(devid, -tiling);
+		else
+			return 1024;
+
+	}
+
+	if (intel_gen(devid) == 2)
+		return 128;
+	else if (tiling == I915_TILING_X)
+		return 512;
+	else if (IS_915(devid))
+		return 512;
+	else
+		return 128;
+}
+
+static int max_tile_width(uint32_t devid, int tiling)
+{
+	if (tiling < 0) {
+		if (intel_gen(devid) >= 4)
+			return 4096 + min_tile_width(devid, -tiling);
+		else
+			return 2048;
+	}
+
+	if (intel_gen(devid) >= 7)
+		return 256 << 10;
+	else if (intel_gen(devid) >= 4)
+		return 128 << 10;
+	else
+		return 8 << 10;
+}
+
+static bool known_swizzling(int fd, uint32_t handle)
+{
+	struct drm_i915_gem_get_tiling2 {
+		uint32_t handle;
+		uint32_t tiling_mode;
+		uint32_t swizzle_mode;
+		uint32_t phys_swizzle_mode;
+	} arg = {
+		.handle = handle,
+	};
+#define DRM_IOCTL_I915_GEM_GET_TILING2	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling2)
+
+	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING2, &arg))
+		return false;
+
+	return arg.phys_swizzle_mode == arg.swizzle_mode;
+}
+
+static void
+test_huge_bo(int fd, int huge, int tiling)
+{
+	uint32_t bo;
+	char *ptr;
+	char *tiled_pattern;
+	char *linear_pattern;
+	uint64_t size, last_offset;
+	uint32_t devid = intel_get_drm_devid(fd);
+	int pitch = min_tile_width(devid, tiling);
+	int i;
+
+	switch (huge) {
+	case -1:
+		size = gem_mappable_aperture_size() / 2;
+
+		/* Power of two fence size, natural fence
+		 * alignment, and the guard page at the end
+		 * gtt means that if the entire gtt is
+		 * mappable, we can't usually fit in a tiled
+		 * object half the size of the gtt. Let's use
+		 * a quarter size one instead.
+		 */
+		if (tiling &&
+		    intel_gen(intel_get_drm_devid(fd)) < 4 &&
+		    size >= gem_global_aperture_size(fd) / 2)
+			size /= 2;
+		break;
+	case 0:
+		size = gem_mappable_aperture_size() + PAGE_SIZE;
+		break;
+	default:
+		size = gem_global_aperture_size(fd) + PAGE_SIZE;
+		break;
+	}
+	intel_require_memory(1, size, CHECK_RAM);
+
+	last_offset = size - PAGE_SIZE;
+
+	/* Create pattern */
+	bo = gem_create(fd, PAGE_SIZE);
+	if (tiling)
+		igt_require(__gem_set_tiling(fd, bo, tiling, pitch) == 0);
+	igt_require(known_swizzling(fd, bo));
+
+	linear_pattern = gem_mmap__gtt(fd, bo, PAGE_SIZE,
+				       PROT_READ | PROT_WRITE);
+	for (i = 0; i < PAGE_SIZE; i++)
+		linear_pattern[i] = i;
+	tiled_pattern = gem_mmap__cpu(fd, bo, 0, PAGE_SIZE, PROT_READ);
+
+	gem_set_domain(fd, bo, I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT, 0);
+	gem_close(fd, bo);
+
+	bo = gem_create(fd, size);
+	if (tiling)
+		igt_require(__gem_set_tiling(fd, bo, tiling, pitch) == 0);
+
+	/* Initialise first/last page through CPU mmap */
+	ptr = gem_mmap__cpu(fd, bo, 0, size, PROT_READ | PROT_WRITE);
+	memcpy(ptr, tiled_pattern, PAGE_SIZE);
+	memcpy(ptr + last_offset, tiled_pattern, PAGE_SIZE);
+	munmap(ptr, size);
+
+	/* Obtain mapping for the object through GTT. */
+	ptr = __gem_mmap__gtt(fd, bo, size, PROT_READ | PROT_WRITE);
+	igt_require_f(ptr, "Huge BO GTT mapping not supported.\n");
+
+	set_domain_gtt(fd, bo);
+
+	/* Access through GTT should still provide the CPU written values. */
+	igt_assert(memcmp(ptr              , linear_pattern, PAGE_SIZE) == 0);
+	igt_assert(memcmp(ptr + last_offset, linear_pattern, PAGE_SIZE) == 0);
+
+	gem_set_tiling(fd, bo, I915_TILING_NONE, 0);
+
+	igt_assert(memcmp(ptr              , tiled_pattern, PAGE_SIZE) == 0);
+	igt_assert(memcmp(ptr + last_offset, tiled_pattern, PAGE_SIZE) == 0);
+
+	munmap(ptr, size);
+
+	gem_close(fd, bo);
+	munmap(tiled_pattern, PAGE_SIZE);
+	munmap(linear_pattern, PAGE_SIZE);
+}
+
+static void copy_wc_page(void *dst, const void *src)
+{
+	igt_memcpy_from_wc(dst, src, PAGE_SIZE);
+}
+
+static unsigned int tile_row_size(int tiling, unsigned int stride)
+{
+	if (tiling < 0)
+		tiling = -tiling;
+
+	return stride * (tiling == I915_TILING_Y ? 32 : 8);
+}
+
+#define rounddown(x, y) (x - (x%y))
+
+static void
+test_huge_copy(int fd, int huge, int tiling_a, int tiling_b, int ncpus)
+{
+	const uint32_t devid = intel_get_drm_devid(fd);
+	uint64_t huge_object_size, i;
+	unsigned mode = CHECK_RAM;
+
+	switch (huge) {
+	case -2:
+		huge_object_size = gem_mappable_aperture_size() / 4;
+		break;
+	case -1:
+		huge_object_size = gem_mappable_aperture_size() / 2;
+		break;
+	case 0:
+		huge_object_size = gem_mappable_aperture_size() + PAGE_SIZE;
+		break;
+	case 1:
+		huge_object_size = gem_global_aperture_size(fd) + PAGE_SIZE;
+		break;
+	default:
+		huge_object_size = (intel_get_total_ram_mb() << 19) + PAGE_SIZE;
+		mode |= CHECK_SWAP;
+		break;
+	}
+	intel_require_memory(2*ncpus, huge_object_size, mode);
+
+	igt_fork(child, ncpus) {
+		uint64_t valid_size = huge_object_size;
+		uint32_t bo;
+		char *a, *b;
+
+		bo = gem_create(fd, huge_object_size);
+		if (tiling_a) {
+			igt_require(__gem_set_tiling(fd, bo, abs(tiling_a), min_tile_width(devid, tiling_a)) == 0);
+			valid_size = rounddown(valid_size, tile_row_size(tiling_a, min_tile_width(devid, tiling_a)));
+		}
+		a = __gem_mmap__gtt(fd, bo, huge_object_size, PROT_READ | PROT_WRITE);
+		igt_require(a);
+		gem_close(fd, bo);
+
+		bo = gem_create(fd, huge_object_size);
+		if (tiling_b) {
+			igt_require(__gem_set_tiling(fd, bo, abs(tiling_b), max_tile_width(devid, tiling_b)) == 0);
+			valid_size = rounddown(valid_size, tile_row_size(tiling_b, max_tile_width(devid, tiling_b)));
+		}
+		b = __gem_mmap__gtt(fd, bo, huge_object_size, PROT_READ | PROT_WRITE);
+		igt_require(b);
+		gem_close(fd, bo);
+
+		for (i = 0; i < valid_size / PAGE_SIZE; i++) {
+			uint32_t *ptr = (uint32_t *)(a + PAGE_SIZE*i);
+			for (int j = 0; j < PAGE_SIZE/4; j++)
+				ptr[j] = i + j;
+			igt_progress("Writing a ", i, valid_size / PAGE_SIZE);
+		}
+
+
+		for (i = 0; i < valid_size / PAGE_SIZE; i++) {
+			uint32_t *ptr = (uint32_t *)(b + PAGE_SIZE*i);
+			for (int j = 0; j < PAGE_SIZE/4; j++)
+				ptr[j] = ~(i + j);
+			igt_progress("Writing b ", i, valid_size / PAGE_SIZE);
+		}
+
+		for (i = 0; i < valid_size / PAGE_SIZE; i++) {
+			uint32_t *A = (uint32_t *)(a + PAGE_SIZE*i);
+			uint32_t *B = (uint32_t *)(b + PAGE_SIZE*i);
+			uint32_t A_tmp[PAGE_SIZE/sizeof(uint32_t)];
+			uint32_t B_tmp[PAGE_SIZE/sizeof(uint32_t)];
+
+			copy_wc_page(A_tmp, A);
+			copy_wc_page(B_tmp, B);
+			for (int j = 0; j < PAGE_SIZE/4; j++)
+				if ((i +  j) & 1)
+					A_tmp[j] = B_tmp[j];
+				else
+					B_tmp[j] = A_tmp[j];
+			memcpy(A, A_tmp, PAGE_SIZE);
+			memcpy(B, B_tmp, PAGE_SIZE);
+
+			igt_progress("Copying a<->b ", i, valid_size / PAGE_SIZE);
+		}
+
+		for (i = 0; i < valid_size / PAGE_SIZE; i++) {
+			uint32_t page[PAGE_SIZE/sizeof(uint32_t)];
+			copy_wc_page(page, a + PAGE_SIZE*i);
+			for (int j = 0; j < PAGE_SIZE/sizeof(uint32_t); j++)
+				if ((i + j) & 1)
+					igt_assert_eq_u32(page[j], ~(i + j));
+				else
+					igt_assert_eq_u32(page[j], i + j);
+			igt_progress("Checking a ", i, valid_size / PAGE_SIZE);
+		}
+		munmap(a, huge_object_size);
+
+		for (i = 0; i < valid_size / PAGE_SIZE; i++) {
+			uint32_t page[PAGE_SIZE/sizeof(uint32_t)];
+			copy_wc_page(page, b + PAGE_SIZE*i);
+			for (int j = 0; j < PAGE_SIZE/sizeof(uint32_t); j++)
+				if ((i + j) & 1)
+					igt_assert_eq_u32(page[j], ~(i + j));
+				else
+					igt_assert_eq_u32(page[j], i + j);
+			igt_progress("Checking b ", i, valid_size / PAGE_SIZE);
+		}
+		munmap(b, huge_object_size);
+	}
+	igt_waitchildren();
+}
+
+static void
+test_read(int fd)
+{
+	void *dst;
+	uint32_t src;
+
+	/* copy from a fresh src to fresh dst to force pagefault on both */
+	dst = create_pointer(fd);
+	src = gem_create(fd, OBJECT_SIZE);
+
+	gem_read(fd, src, 0, dst, OBJECT_SIZE);
+
+	gem_close(fd, src);
+	munmap(dst, OBJECT_SIZE);
+}
+
+static void
+test_write_cpu_read_gtt(int fd)
+{
+	uint32_t handle;
+	uint32_t *src, *dst;
+
+	igt_require(gem_has_llc(fd));
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	dst = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ);
+
+	src = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_WRITE);
+
+	gem_close(fd, handle);
+
+	memset(src, 0xaa, OBJECT_SIZE);
+	igt_assert(memcmp(dst, src, OBJECT_SIZE) == 0);
+
+	munmap(src, OBJECT_SIZE);
+	munmap(dst, OBJECT_SIZE);
+}
+
+struct thread_fault_concurrent {
+	pthread_t thread;
+	int id;
+	uint32_t **ptr;
+};
+
+static void *
+thread_fault_concurrent(void *closure)
+{
+	struct thread_fault_concurrent *t = closure;
+	uint32_t val = 0;
+	int n;
+
+	for (n = 0; n < 32; n++) {
+		if (n & 1)
+			*t->ptr[(n + t->id) % 32] = val;
+		else
+			val = *t->ptr[(n + t->id) % 32];
+	}
+
+	return NULL;
+}
+
+static void
+test_fault_concurrent(int fd)
+{
+	uint32_t *ptr[32];
+	struct thread_fault_concurrent thread[64];
+	int n;
+
+	for (n = 0; n < 32; n++) {
+		ptr[n] = create_pointer(fd);
+	}
+
+	for (n = 0; n < 64; n++) {
+		thread[n].ptr = ptr;
+		thread[n].id = n;
+		pthread_create(&thread[n].thread, NULL, thread_fault_concurrent, &thread[n]);
+	}
+
+	for (n = 0; n < 64; n++)
+		pthread_join(thread[n].thread, NULL);
+
+	for (n = 0; n < 32; n++) {
+		munmap(ptr[n], OBJECT_SIZE);
+	}
+}
+
+static void
+run_without_prefault(int fd,
+			void (*func)(int fd))
+{
+	igt_disable_prefault();
+	func(fd);
+	igt_enable_prefault();
+}
+
+int fd;
+
+igt_main
+{
+	if (igt_run_in_simulation())
+		OBJECT_SIZE = 1 * 1024 * 1024;
+
+	igt_fixture
+		fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_subtest("basic")
+		test_access(fd);
+	igt_subtest("basic-short")
+		test_short(fd);
+	igt_subtest("basic-copy")
+		test_copy(fd);
+	igt_subtest("basic-read")
+		test_read(fd);
+	igt_subtest("basic-write")
+		test_write(fd);
+	igt_subtest("basic-write-gtt")
+		test_write_gtt(fd);
+	igt_subtest("coherency")
+		test_coherency(fd);
+	igt_subtest("clflush")
+		test_clflush(fd);
+	igt_subtest("hang")
+		test_hang(fd);
+	igt_subtest("basic-read-write")
+		test_read_write(fd, READ_BEFORE_WRITE);
+	igt_subtest("basic-write-read")
+		test_read_write(fd, READ_AFTER_WRITE);
+	igt_subtest("basic-read-write-distinct")
+		test_read_write2(fd, READ_BEFORE_WRITE);
+	igt_subtest("basic-write-read-distinct")
+		test_read_write2(fd, READ_AFTER_WRITE);
+	igt_subtest("fault-concurrent")
+		test_fault_concurrent(fd);
+	igt_subtest("basic-read-no-prefault")
+		run_without_prefault(fd, test_read);
+	igt_subtest("basic-write-no-prefault")
+		run_without_prefault(fd, test_write);
+	igt_subtest("basic-write-gtt-no-prefault")
+		run_without_prefault(fd, test_write_gtt);
+	igt_subtest("basic-write-cpu-read-gtt")
+		test_write_cpu_read_gtt(fd);
+	igt_subtest("basic-wc")
+		test_wc(fd);
+
+	igt_subtest("basic-small-bo")
+		test_huge_bo(fd, -1, I915_TILING_NONE);
+	igt_subtest("basic-small-bo-tiledX")
+		test_huge_bo(fd, -1, I915_TILING_X);
+	igt_subtest("basic-small-bo-tiledY")
+		test_huge_bo(fd, -1, I915_TILING_Y);
+
+	igt_subtest("big-bo")
+		test_huge_bo(fd, 0, I915_TILING_NONE);
+	igt_subtest("big-bo-tiledX")
+		test_huge_bo(fd, 0, I915_TILING_X);
+	igt_subtest("big-bo-tiledY")
+		test_huge_bo(fd, 0, I915_TILING_Y);
+
+	igt_subtest("huge-bo")
+		test_huge_bo(fd, 1, I915_TILING_NONE);
+	igt_subtest("huge-bo-tiledX")
+		test_huge_bo(fd, 1, I915_TILING_X);
+	igt_subtest("huge-bo-tiledY")
+		test_huge_bo(fd, 1, I915_TILING_Y);
+
+	igt_subtest_group {
+		const struct copy_size {
+			const char *prefix;
+			int size;
+		} copy_sizes[] = {
+			{ "basic-small", -2 },
+			{ "medium", -1 },
+			{ "big", 0 },
+			{ "huge", 1 },
+			{ "swap", 2 },
+			{ }
+		};
+		const struct copy_mode {
+			const char *suffix;
+			int tiling_x, tiling_y;
+		} copy_modes[] = {
+			{ "", I915_TILING_NONE, I915_TILING_NONE},
+			{ "-XY", I915_TILING_X, I915_TILING_Y},
+			{ "-odd", -I915_TILING_X, -I915_TILING_Y},
+			{}
+		};
+		const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+		for (const struct copy_size *s = copy_sizes; s->prefix; s++)
+			for (const struct copy_mode *m = copy_modes; m->suffix; m++) {
+				igt_subtest_f("%s-copy%s", s->prefix, m->suffix)
+					test_huge_copy(fd,
+							s->size,
+							m->tiling_x,
+							m->tiling_y,
+							1);
+
+				igt_subtest_f("forked-%s-copy%s", s->prefix, m->suffix)
+					test_huge_copy(fd,
+							s->size,
+							m->tiling_x,
+							m->tiling_y,
+							ncpus);
+			}
+	}
+
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_mmap_offset_exhaustion.c b/tests/i915/gem_mmap_offset_exhaustion.c
new file mode 100644
index 00000000..8c8e3fa2
--- /dev/null
+++ b/tests/i915/gem_mmap_offset_exhaustion.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Checks whether the kernel handles mmap offset exhaustion"
+		     " correctly.");
+
+#define OBJECT_SIZE (1024*1024)
+
+/* Testcase: checks whether the kernel handles mmap offset exhaustion correctly
+ *
+ * Currently the kernel doesn't reap the mmap offset of purged objects, albeit
+ * there's nothing that prevents it ABI-wise and it helps to get out of corners
+ * (because drm_mm is only 32bit on 32bit archs unfortunately.
+ *
+ * Note that on 64bit machines we have plenty of address space (because drm_mm
+ * uses unsigned long).
+ */
+
+static void
+create_and_map_bo(int fd)
+{
+	uint32_t handle;
+	char *ptr;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	/* touch it to force it into the gtt */
+	*ptr = 0;
+
+	/* but then unmap it again because we only have limited address space on
+	 * 32 bit */
+	munmap(ptr, OBJECT_SIZE);
+
+	/* we happily leak objects to exhaust mmap offset space, the kernel will
+	 * reap backing storage. */
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+}
+
+igt_simple_main
+{
+	int fd, i;
+
+	igt_skip_on_simulation();
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	/* we have 32bit of address space, so try to fit one MB more
+	 * than that. */
+	for (i = 0; i < 4096 + 1; i++)
+		create_and_map_bo(fd);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_mmap_wc.c b/tests/i915/gem_mmap_wc.c
new file mode 100644
index 00000000..110883eb
--- /dev/null
+++ b/tests/i915/gem_mmap_wc.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+struct local_i915_gem_mmap_v2 {
+	uint32_t handle;
+	uint32_t pad;
+	uint64_t offset;
+	uint64_t size;
+	uint64_t addr_ptr;
+	uint64_t flags;
+#define I915_MMAP_WC 0x1
+};
+#define LOCAL_IOCTL_I915_GEM_MMAP_v2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap_v2)
+
+static int OBJECT_SIZE = 16*1024*1024;
+
+static void set_domain(int fd, uint32_t handle)
+{
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+}
+
+static void *
+mmap_bo(int fd, uint32_t handle)
+{
+	void *ptr;
+
+	ptr = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	return ptr;
+}
+
+static void *
+create_pointer(int fd)
+{
+	uint32_t handle;
+	void *ptr;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	ptr = mmap_bo(fd, handle);
+	set_domain(fd, handle);
+
+	gem_close(fd, handle);
+
+	return ptr;
+}
+
+static void
+test_invalid_flags(int fd)
+{
+	struct drm_i915_getparam gp;
+	struct local_i915_gem_mmap_v2 arg;
+	uint64_t flag = I915_MMAP_WC;
+	int val = -1;
+
+	memset(&arg, 0, sizeof(arg));
+	arg.handle = gem_create(fd, 4096);
+	arg.offset = 0;
+	arg.size = 4096;
+
+	memset(&gp, 0, sizeof(gp));
+	gp.param = 30; /* MMAP_VERSION */
+	gp.value = &val;
+
+	/* Do we have the new mmap_ioctl? */
+	drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+
+	if (val >= 1) {
+		/*
+		 * Only MMAP_WC flag is supported in version 1, so any other
+		 * flag should be rejected.
+		 */
+		flag <<= 1;
+		while (flag) {
+			arg.flags = flag;
+			igt_assert(drmIoctl(fd,
+				   LOCAL_IOCTL_I915_GEM_MMAP_v2,
+				   &arg) == -1);
+			igt_assert_eq(errno, EINVAL);
+			flag <<= 1;
+		}
+	}
+
+	gem_close(fd, arg.handle);
+}
+
+static void
+test_copy(int fd)
+{
+	void *src, *dst;
+
+	/* copy from a fresh src to fresh dst to force pagefault on both */
+	src = create_pointer(fd);
+	dst = create_pointer(fd);
+
+	memcpy(dst, src, OBJECT_SIZE);
+	memcpy(src, dst, OBJECT_SIZE);
+
+	munmap(dst, OBJECT_SIZE);
+	munmap(src, OBJECT_SIZE);
+}
+
+enum test_read_write {
+	READ_BEFORE_WRITE,
+	READ_AFTER_WRITE,
+};
+
+static void
+test_read_write(int fd, enum test_read_write order)
+{
+	uint32_t handle;
+	void *ptr;
+	volatile uint32_t val = 0;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	set_domain(fd, handle);
+
+	ptr = mmap_bo(fd, handle);
+	igt_assert(ptr != MAP_FAILED);
+
+	if (order == READ_BEFORE_WRITE) {
+		val = *(uint32_t *)ptr;
+		*(uint32_t *)ptr = val;
+	} else {
+		*(uint32_t *)ptr = val;
+		val = *(uint32_t *)ptr;
+	}
+
+	gem_close(fd, handle);
+	munmap(ptr, OBJECT_SIZE);
+}
+
+static void
+test_read_write2(int fd, enum test_read_write order)
+{
+	uint32_t handle;
+	void *r, *w;
+	volatile uint32_t val = 0;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	set_domain(fd, handle);
+
+	r = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_READ);
+
+	w = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	if (order == READ_BEFORE_WRITE) {
+		val = *(uint32_t *)r;
+		*(uint32_t *)w = val;
+	} else {
+		*(uint32_t *)w = val;
+		val = *(uint32_t *)r;
+	}
+
+	gem_close(fd, handle);
+	munmap(r, OBJECT_SIZE);
+	munmap(w, OBJECT_SIZE);
+}
+
+static void
+test_write(int fd)
+{
+	void *src;
+	uint32_t dst;
+
+	/* copy from a fresh src to fresh dst to force pagefault on both */
+	src = create_pointer(fd);
+	dst = gem_create(fd, OBJECT_SIZE);
+
+	gem_write(fd, dst, 0, src, OBJECT_SIZE);
+
+	gem_close(fd, dst);
+	munmap(src, OBJECT_SIZE);
+}
+
+static void
+test_coherency(int fd)
+{
+	uint32_t handle;
+	uint32_t *wc, *cpu;
+	int i;
+
+	igt_require(igt_setup_clflush());
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	wc = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	cpu = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+	for (i = 0; i < OBJECT_SIZE / 64; i++) {
+		int x = 16*i + (i%16);
+		wc[x] = i;
+		igt_clflush_range(&cpu[x], sizeof(cpu[x]));
+		igt_assert_eq(cpu[x], i);
+	}
+
+	munmap(cpu, OBJECT_SIZE);
+	munmap(wc, OBJECT_SIZE);
+	gem_close(fd, handle);
+}
+
+static void
+test_write_gtt(int fd)
+{
+	uint32_t dst;
+	char *dst_gtt;
+	void *src;
+
+	dst = gem_create(fd, OBJECT_SIZE);
+	set_domain(fd, dst);
+
+	/* prefault object into gtt */
+	dst_gtt = mmap_bo(fd, dst);
+	memset(dst_gtt, 0, OBJECT_SIZE);
+	munmap(dst_gtt, OBJECT_SIZE);
+
+	src = create_pointer(fd);
+
+	gem_write(fd, dst, 0, src, OBJECT_SIZE);
+
+	gem_close(fd, dst);
+	munmap(src, OBJECT_SIZE);
+}
+
+static void
+test_read(int fd)
+{
+	void *dst;
+	uint32_t src;
+
+	/* copy from a fresh src to fresh dst to force pagefault on both */
+	dst = create_pointer(fd);
+	src = gem_create(fd, OBJECT_SIZE);
+
+	gem_read(fd, src, 0, dst, OBJECT_SIZE);
+
+	gem_close(fd, src);
+	munmap(dst, OBJECT_SIZE);
+}
+
+static void
+test_close(int fd)
+{
+	uint32_t handle = gem_create(fd, OBJECT_SIZE);
+	uint8_t *ptr = mmap_bo(fd, handle);
+	int i;
+
+	memset(ptr, 0xcc, OBJECT_SIZE);
+	gem_close(fd, handle);
+	for (i = 0; i < 4096; i++)
+		igt_assert(ptr[i*4096+i] == 0xcc);
+
+	munmap(ptr, OBJECT_SIZE);
+}
+
+static void
+test_write_cpu_read_wc(int fd, int force_domain)
+{
+	uint32_t handle;
+	uint32_t *src, *dst;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	dst = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_READ);
+
+	src = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_WRITE);
+
+	memset(src, 0xaa, OBJECT_SIZE);
+	if (force_domain)
+		set_domain(fd, handle);
+	igt_assert(memcmp(dst, src, OBJECT_SIZE) == 0);
+	gem_close(fd, handle);
+
+	munmap(src, OBJECT_SIZE);
+	munmap(dst, OBJECT_SIZE);
+}
+
+static void
+test_write_gtt_read_wc(int fd)
+{
+	uint32_t handle;
+	uint32_t *src, *dst;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	set_domain(fd, handle);
+
+	dst = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_READ);
+
+	src = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE);
+
+	memset(src, 0xaa, OBJECT_SIZE);
+	igt_assert(memcmp(dst, src, OBJECT_SIZE) == 0);
+	gem_close(fd, handle);
+
+	munmap(src, OBJECT_SIZE);
+	munmap(dst, OBJECT_SIZE);
+}
+
+static void
+test_set_cache_level(int fd)
+{
+	struct drm_mode_cursor arg;
+	struct drm_mode_card_res res;
+	uint32_t crtc[32];
+	int active_crtc = 0;
+	int n;
+
+	/* We want to trigger an old WARN in set-cache-level when
+	 * it sees an unbound object in the GTT domain, following
+	 * the introduction of mmap(wc).
+	 */
+
+	memset(&arg, 0, sizeof(arg));
+	arg.flags = DRM_MODE_CURSOR_BO;
+	arg.width = arg.height = 64;
+	arg.handle = gem_create(fd, 64*64*4);
+	set_domain(fd, arg.handle);
+
+	/* Bind the object to the cursor to force set-cache-level(DISPLAY) */
+	memset(&res, 0, sizeof(res));
+	res.count_crtcs = 32;
+	res.crtc_id_ptr = to_user_pointer(crtc);
+	do_ioctl(fd, DRM_IOCTL_MODE_GETRESOURCES, &res);
+	for (n = 0; n < res.count_crtcs; n++) {
+		struct drm_mode_crtc mode;
+
+		memset(&mode, 0, sizeof(mode));
+		mode.crtc_id = crtc[n];
+		do_ioctl(fd, DRM_IOCTL_MODE_GETCRTC, &mode);
+
+		if (!mode.mode_valid)
+			continue;
+
+		active_crtc++;
+
+		arg.crtc_id = crtc[n];
+		do_ioctl(fd, DRM_IOCTL_MODE_CURSOR, &arg);
+	}
+
+	gem_close(fd, arg.handle);
+	igt_require(active_crtc);
+}
+
+struct thread_fault_concurrent {
+	pthread_t thread;
+	int id;
+	uint32_t **ptr;
+};
+
+static void *
+thread_fault_concurrent(void *closure)
+{
+	struct thread_fault_concurrent *t = closure;
+	uint32_t val = 0;
+	int n;
+
+	for (n = 0; n < 32; n++) {
+		if (n & 1)
+			*t->ptr[(n + t->id) % 32] = val;
+		else
+			val = *t->ptr[(n + t->id) % 32];
+	}
+
+	return NULL;
+}
+
+static void
+test_fault_concurrent(int fd)
+{
+	uint32_t *ptr[32];
+	struct thread_fault_concurrent thread[64];
+	int n;
+
+	for (n = 0; n < 32; n++) {
+		ptr[n] = create_pointer(fd);
+	}
+
+	for (n = 0; n < 64; n++) {
+		thread[n].ptr = ptr;
+		thread[n].id = n;
+		pthread_create(&thread[n].thread, NULL, thread_fault_concurrent, &thread[n]);
+	}
+
+	for (n = 0; n < 64; n++)
+		pthread_join(thread[n].thread, NULL);
+
+	for (n = 0; n < 32; n++) {
+		munmap(ptr[n], OBJECT_SIZE);
+	}
+}
+
+static void
+run_without_prefault(int fd,
+			void (*func)(int fd))
+{
+	igt_disable_prefault();
+	func(fd);
+	igt_enable_prefault();
+}
+
+int fd;
+
+igt_main
+{
+	if (igt_run_in_simulation())
+		OBJECT_SIZE = 1 * 1024 * 1024;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		gem_require_mmap_wc(fd);
+	}
+
+	igt_subtest("invalid-flags")
+		test_invalid_flags(fd);
+	igt_subtest("close")
+		test_close(fd);
+	igt_subtest("copy")
+		test_copy(fd);
+	igt_subtest("read")
+		test_read(fd);
+	igt_subtest("write")
+		test_write(fd);
+	igt_subtest("coherency")
+		test_coherency(fd);
+	igt_subtest("write-gtt")
+		test_write_gtt(fd);
+	igt_subtest("read-write")
+		test_read_write(fd, READ_BEFORE_WRITE);
+	igt_subtest("write-read")
+		test_read_write(fd, READ_AFTER_WRITE);
+	igt_subtest("read-write-distinct")
+		test_read_write2(fd, READ_BEFORE_WRITE);
+	igt_subtest("write-read-distinct")
+		test_read_write2(fd, READ_AFTER_WRITE);
+	igt_subtest("fault-concurrent")
+		test_fault_concurrent(fd);
+	igt_subtest("read-no-prefault")
+		run_without_prefault(fd, test_read);
+	igt_subtest("write-no-prefault")
+		run_without_prefault(fd, test_write);
+	igt_subtest("write-gtt-no-prefault")
+		run_without_prefault(fd, test_write_gtt);
+	igt_subtest("write-cpu-read-wc")
+		test_write_cpu_read_wc(fd, 1);
+	igt_subtest("write-cpu-read-wc-unflushed")
+		test_write_cpu_read_wc(fd, 0);
+	igt_subtest("write-gtt-read-wc")
+		test_write_gtt_read_wc(fd);
+	igt_subtest("set-cache-level")
+		test_set_cache_level(fd);
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_mocs_settings.c b/tests/i915/gem_mocs_settings.c
new file mode 100644
index 00000000..967223f1
--- /dev/null
+++ b/tests/i915/gem_mocs_settings.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/** @file gem_mocs_settings.c
+ *
+ * Check that the MOCs cache settings are valid.
+ */
+
+#include "igt.h"
+#include "igt_gt.h"
+#include "igt_perf.h"
+#include "igt_sysfs.h"
+
+#define MAX_NUMBER_MOCS_REGISTERS	(64)
+enum {
+	NONE,
+	RESET,
+	RC6,
+	SUSPEND,
+	HIBERNATE,
+	MAX_MOCS_TEST_MODES
+};
+
+static const char * const test_modes[] = {
+	[NONE] = "settings",
+	[RESET] = "reset",
+	[RC6] = "rc6",
+	[SUSPEND] = "suspend",
+	[HIBERNATE] = "hibernate"
+};
+
+#define MOCS_NON_DEFAULT_CTX	(1<<0)
+#define MOCS_DIRTY_VALUES	(1<<1)
+#define ALL_MOCS_FLAGS		(MOCS_NON_DEFAULT_CTX | \
+				 MOCS_DIRTY_VALUES)
+
+#define GEN9_LNCFCMOCS0		(0xB020)	/* L3 Cache Control base */
+#define GEN9_GFX_MOCS_0		(0xc800)	/* Graphics MOCS base register*/
+#define GEN9_MFX0_MOCS_0	(0xc900)	/* Media 0 MOCS base register*/
+#define GEN9_MFX1_MOCS_0	(0xcA00)	/* Media 1 MOCS base register*/
+#define GEN9_VEBOX_MOCS_0	(0xcB00)	/* Video MOCS base register*/
+#define GEN9_BLT_MOCS_0		(0xcc00)	/* Blitter MOCS base register*/
+
+struct mocs_entry {
+	uint32_t	control_value;
+	uint16_t	l3cc_value;
+};
+
+struct mocs_table {
+	uint32_t		size;
+	const struct mocs_entry	*table;
+};
+
+/* The first entries in the MOCS tables are defined by uABI */
+static const struct mocs_entry skylake_mocs_table[] = {
+	{ 0x00000009, 0x0010 },
+	{ 0x00000038, 0x0030 },
+	{ 0x0000003b, 0x0030 },
+};
+
+static const struct mocs_entry dirty_skylake_mocs_table[] = {
+	{ 0x00003FFF, 0x003F }, /* no snoop bit */
+	{ 0x00003FFF, 0x003F },
+	{ 0x00003FFF, 0x003F },
+};
+
+static const struct mocs_entry broxton_mocs_table[] = {
+	{ 0x00000009, 0x0010 },
+	{ 0x00000038, 0x0030 },
+	{ 0x00000039, 0x0030 },
+};
+
+static const struct mocs_entry dirty_broxton_mocs_table[] = {
+	{ 0x00007FFF, 0x003F },
+	{ 0x00007FFF, 0x003F },
+	{ 0x00007FFF, 0x003F },
+};
+
+static const uint32_t write_values[] = {
+	0xFFFFFFFF,
+	0xFFFFFFFF,
+	0xFFFFFFFF,
+	0xFFFFFFFF
+};
+
+static bool get_mocs_settings(int fd, struct mocs_table *table, bool dirty)
+{
+	uint32_t devid = intel_get_drm_devid(fd);
+	bool result = false;
+
+	if (IS_SKYLAKE(devid) || IS_KABYLAKE(devid)) {
+		if (dirty) {
+			table->size  = ARRAY_SIZE(dirty_skylake_mocs_table);
+			table->table = dirty_skylake_mocs_table;
+		} else {
+			table->size  = ARRAY_SIZE(skylake_mocs_table);
+			table->table = skylake_mocs_table;
+		}
+		result = true;
+	} else if (IS_BROXTON(devid)) {
+		if (dirty) {
+			table->size  = ARRAY_SIZE(dirty_broxton_mocs_table);
+			table->table = dirty_broxton_mocs_table;
+		} else {
+			table->size  = ARRAY_SIZE(broxton_mocs_table);
+			table->table = broxton_mocs_table;
+		}
+		result = true;
+	}
+
+	return result;
+}
+
+#define LOCAL_I915_EXEC_BSD1 (I915_EXEC_BSD | (1<<13))
+#define LOCAL_I915_EXEC_BSD2 (I915_EXEC_BSD | (2<<13))
+
+static uint32_t get_engine_base(uint32_t engine)
+{
+	switch (engine) {
+	case LOCAL_I915_EXEC_BSD1:	return GEN9_MFX0_MOCS_0;
+	case LOCAL_I915_EXEC_BSD2:	return GEN9_MFX1_MOCS_0;
+	case I915_EXEC_RENDER:		return GEN9_GFX_MOCS_0;
+	case I915_EXEC_BLT:		return GEN9_BLT_MOCS_0;
+	case I915_EXEC_VEBOX:		return GEN9_VEBOX_MOCS_0;
+	default:			return 0;
+	}
+}
+
+#define MI_STORE_REGISTER_MEM_64_BIT_ADDR	((0x24 << 23) | 2)
+
+static int create_read_batch(struct drm_i915_gem_relocation_entry *reloc,
+			     uint32_t *batch,
+			     uint32_t dst_handle,
+			     uint32_t size,
+			     uint32_t reg_base)
+{
+	unsigned int offset = 0;
+
+	for (uint32_t index = 0; index < size; index++, offset += 4) {
+		batch[offset]   = MI_STORE_REGISTER_MEM_64_BIT_ADDR;
+		batch[offset+1] = reg_base + (index * sizeof(uint32_t));
+		batch[offset+2] = index * sizeof(uint32_t);	/* reloc */
+		batch[offset+3] = 0;
+
+		reloc[index].offset = (offset + 2) * sizeof(uint32_t);
+		reloc[index].delta = index * sizeof(uint32_t);
+		reloc[index].target_handle = dst_handle;
+		reloc[index].write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc[index].read_domains = I915_GEM_DOMAIN_RENDER;
+	}
+
+	batch[offset++] = MI_BATCH_BUFFER_END;
+	batch[offset++] = 0;
+
+	return offset * sizeof(uint32_t);
+}
+
+static void do_read_registers(int fd,
+			      uint32_t ctx_id,
+			      uint32_t dst_handle,
+			      uint32_t reg_base,
+			      uint32_t size,
+			      uint32_t engine_id)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[size];
+	uint32_t batch[size * 4 + 4];
+	uint32_t handle = gem_create(fd, 4096);
+
+	memset(reloc, 0, sizeof(reloc));
+	memset(obj, 0, sizeof(obj));
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	obj[0].handle = dst_handle;
+
+	obj[1].handle = handle;
+	obj[1].relocation_count = size;
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.batch_len =
+		create_read_batch(reloc, batch, dst_handle, size, reg_base);
+	i915_execbuffer2_set_context_id(execbuf, ctx_id);
+	execbuf.flags = I915_EXEC_SECURE | engine_id;
+
+	gem_write(fd, handle, 0, batch, execbuf.batch_len);
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, handle);
+}
+
+#define LOCAL_MI_LOAD_REGISTER_IMM	(0x22 << 23)
+
+static int create_write_batch(uint32_t *batch,
+			      const uint32_t *values,
+			      uint32_t size,
+			      uint32_t reg_base)
+{
+	unsigned int i;
+	unsigned int offset = 0;
+
+	batch[offset++] = LOCAL_MI_LOAD_REGISTER_IMM | (size * 2 - 1);
+
+	for (i = 0; i < size; i++) {
+		batch[offset++] = reg_base + (i * 4);
+		batch[offset++] = values[i];
+	}
+
+	batch[offset++] = MI_BATCH_BUFFER_END;
+
+	return offset * sizeof(uint32_t);
+}
+
+static void write_registers(int fd,
+			    uint32_t ctx_id,
+			    uint32_t reg_base,
+			    const uint32_t *values,
+			    uint32_t size,
+			    uint32_t engine_id)
+{
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[size * 4 + 2];
+	uint32_t handle = gem_create(fd, 4096);
+
+	memset(&obj, 0, sizeof(obj));
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	obj.handle = handle;
+
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.batch_len = create_write_batch(batch, values, size, reg_base);
+	i915_execbuffer2_set_context_id(execbuf, ctx_id);
+	execbuf.flags = I915_EXEC_SECURE | engine_id;
+
+	gem_write(fd, handle, 0, batch, execbuf.batch_len);
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, handle);
+}
+
+static void check_control_registers(int fd,
+				    unsigned engine,
+				    uint32_t ctx_id,
+				    bool dirty)
+{
+	const uint32_t reg_base = get_engine_base(engine);
+	uint32_t dst_handle = gem_create(fd, 4096);
+	uint32_t *read_regs;
+	struct mocs_table table;
+
+	igt_assert(get_mocs_settings(fd, &table, dirty));
+
+	do_read_registers(fd,
+			  ctx_id,
+			  dst_handle,
+			  reg_base,
+			  table.size,
+			  engine);
+
+	read_regs = gem_mmap__cpu(fd, dst_handle, 0, 4096, PROT_READ);
+
+	gem_set_domain(fd, dst_handle, I915_GEM_DOMAIN_CPU, 0);
+	for (int index = 0; index < table.size; index++)
+		igt_assert_eq_u32(read_regs[index],
+				  table.table[index].control_value);
+
+	munmap(read_regs, 4096);
+	gem_close(fd, dst_handle);
+}
+
+static void check_l3cc_registers(int fd,
+				 unsigned engine,
+				 uint32_t ctx_id,
+				 bool dirty)
+{
+	struct mocs_table table;
+	uint32_t dst_handle = gem_create(fd, 4096);
+	uint32_t *read_regs;
+	int index;
+
+	igt_assert(get_mocs_settings(fd, &table, dirty));
+
+	do_read_registers(fd,
+			  ctx_id,
+			  dst_handle,
+			  GEN9_LNCFCMOCS0,
+			  (table.size + 1) / 2,
+			  engine);
+
+	read_regs = gem_mmap__cpu(fd, dst_handle, 0, 4096, PROT_READ);
+
+	gem_set_domain(fd, dst_handle, I915_GEM_DOMAIN_CPU, 0);
+
+	for (index = 0; index < table.size / 2; index++) {
+		igt_assert_eq_u32(read_regs[index] & 0xffff,
+				  table.table[index * 2].l3cc_value);
+		igt_assert_eq_u32(read_regs[index] >> 16,
+				  table.table[index * 2 + 1].l3cc_value);
+	}
+
+	if (table.size & 1)
+		igt_assert_eq_u32(read_regs[index] & 0xffff,
+				  table.table[index * 2].l3cc_value);
+
+	munmap(read_regs, 4096);
+	gem_close(fd, dst_handle);
+}
+
+static void rc6_wait(int i915)
+{
+	uint64_t start[2], now[2], prev;
+	bool rc6 = false;
+	int fd;
+
+	fd = perf_i915_open(I915_PMU_RC6_RESIDENCY);
+	igt_require(fd != -1);
+
+	/* First wait for roughly an RC6 Evaluation Interval */
+	gem_quiescent_gpu(i915);
+	usleep(320e3);
+
+	/* Then poll for RC6 to start ticking */
+	igt_assert_eq(read(fd, start, sizeof(start)), sizeof(start));
+	prev = start[1];
+	do {
+		usleep(5e3);
+		igt_assert_eq(read(fd, now, sizeof(now)), sizeof(now));
+		if (now[1] - prev > 1e6) {
+			rc6 = true;
+			break;
+		}
+		prev = now[1];
+	} while (now[0] - start[0] < 1e9);
+
+	close(fd);
+
+	igt_debug("rc6 residency %.2fms (delta %.1fms over 5ms), elapsed %.2fms\n",
+		  1e-6 * (now[1] - start[1]),
+		  1e-6 * (now[1] - prev),
+		  1e-6 * (now[0] - start[0]));
+	igt_require(rc6);
+}
+
+static void check_mocs_values(int fd, unsigned engine, uint32_t ctx_id, bool dirty)
+{
+	check_control_registers(fd, engine, ctx_id, dirty);
+
+	if (engine == I915_EXEC_RENDER)
+		check_l3cc_registers(fd, engine, ctx_id, dirty);
+}
+
+static void write_dirty_mocs(int fd, unsigned engine, uint32_t ctx_id)
+{
+	write_registers(fd, ctx_id, get_engine_base(engine),
+			write_values, ARRAY_SIZE(write_values),
+			engine);
+
+	if (engine == I915_EXEC_RENDER)
+		write_registers(fd, ctx_id, GEN9_LNCFCMOCS0,
+				write_values, ARRAY_SIZE(write_values),
+				engine);
+}
+
+static void run_test(int fd, unsigned engine, unsigned flags, unsigned mode)
+{
+	uint32_t ctx_id = 0;
+	uint32_t ctx_clean_id;
+	uint32_t ctx_dirty_id;
+
+	gem_require_ring(fd, engine);
+
+	/* Skip if we don't know where the registers are for this engine */
+	igt_require(get_engine_base(engine));
+
+	if (flags & MOCS_NON_DEFAULT_CTX)
+		ctx_id = gem_context_create(fd);
+
+	if (flags & MOCS_DIRTY_VALUES) {
+		ctx_dirty_id = gem_context_create(fd);
+		write_dirty_mocs(fd, engine, ctx_dirty_id);
+		check_mocs_values(fd, engine, ctx_dirty_id, true);
+	}
+
+	check_mocs_values(fd, engine, ctx_id, false);
+
+	switch (mode) {
+	case NONE:	break;
+	case RESET:	igt_force_gpu_reset(fd);	break;
+	case SUSPEND:	igt_system_suspend_autoresume(SUSPEND_STATE_MEM,
+						      SUSPEND_TEST_NONE); break;
+	case HIBERNATE:	igt_system_suspend_autoresume(SUSPEND_STATE_DISK,
+						      SUSPEND_TEST_NONE); break;
+	case RC6:	rc6_wait(fd);			break;
+	}
+
+	check_mocs_values(fd, engine, ctx_id, false);
+
+	if (flags & MOCS_DIRTY_VALUES) {
+		ctx_clean_id = gem_context_create(fd);
+		check_mocs_values(fd, engine, ctx_dirty_id, true);
+		check_mocs_values(fd, engine, ctx_clean_id, false);
+		gem_context_destroy(fd, ctx_dirty_id);
+		gem_context_destroy(fd, ctx_clean_id);
+	}
+
+	if (ctx_id)
+		gem_context_destroy(fd, ctx_id);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	struct mocs_table table;
+	int fd = -1;
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL); /* for SECURE */
+		igt_require_gem(fd);
+		gem_require_mocs_registers(fd);
+		igt_require(get_mocs_settings(fd, &table, false));
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		/* We don't know which engine will be assigned to us if we're
+		 * using plain I915_EXEC_BSD, I915_EXEC_DEFAULT is just
+		 * duplicating render
+		 */
+		if ((e->exec_id == I915_EXEC_BSD && !e->flags) ||
+		    e->exec_id == I915_EXEC_DEFAULT)
+			continue;
+
+		for (unsigned mode = NONE; mode < MAX_MOCS_TEST_MODES; mode++) {
+			for (unsigned flags = 0; flags < ALL_MOCS_FLAGS + 1; flags++) {
+				/* Trying to test non-render engines for dirtying MOCS
+				 * values from one context having effect on different
+				 * context is bound to fail - only render engine is
+				 * doing context save/restore of MOCS registers.
+				 * Let's also limit testing values on non-default
+				 * contexts to render-only.
+				 */
+				if (flags && e->exec_id != I915_EXEC_RENDER)
+					continue;
+
+				igt_subtest_f("mocs-%s%s%s-%s",
+					      test_modes[mode],
+					      flags & MOCS_NON_DEFAULT_CTX ? "-ctx": "",
+					      flags & MOCS_DIRTY_VALUES ? "-dirty" : "",
+					      e->name) {
+					if (flags & (MOCS_NON_DEFAULT_CTX | MOCS_DIRTY_VALUES))
+						gem_require_contexts(fd);
+
+					run_test(fd, e->exec_id | e->flags, flags, mode);
+				}
+			}
+		}
+	}
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_non_secure_batch.c b/tests/i915/gem_non_secure_batch.c
new file mode 100644
index 00000000..5f339914
--- /dev/null
+++ b/tests/i915/gem_non_secure_batch.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch> (based on gem_storedw_*.c)
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+#include "i830_reg.h"
+
+IGT_TEST_DESCRIPTION("Basic check of non-secure batches.");
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+/*
+ * Testcase: Basic check of non-secure batches
+ *
+ * This test tries to stop the render ring with a MI_LOAD_REG command, which
+ * should fail if the non-secure handling works correctly.
+ */
+
+static int num_rings = 1;
+
+static void
+mi_lri_loop(void)
+{
+	int i;
+
+	srandom(0xdeadbeef);
+
+	for (i = 0; i < 0x100; i++) {
+		int ring = random() % num_rings + 1;
+
+		BEGIN_BATCH(4, 0);
+		OUT_BATCH(MI_LOAD_REGISTER_IMM);
+		OUT_BATCH(0x203c); /* RENDER RING CTL */
+		OUT_BATCH(0); /* try to stop the ring */
+		OUT_BATCH(MI_NOOP);
+		ADVANCE_BATCH();
+
+		intel_batchbuffer_flush_on_ring(batch, ring);
+	}
+}
+
+igt_simple_main
+{
+	int fd;
+	int devid;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	devid = intel_get_drm_devid(fd);
+
+	if (HAS_BSD_RING(devid))
+		num_rings++;
+
+	if (HAS_BLT_RING(devid))
+		num_rings++;
+
+
+	igt_info("num rings detected: %i\n", num_rings);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	igt_assert(bufmgr);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	igt_assert(batch);
+
+	mi_lri_loop();
+	gem_quiescent_gpu(fd);
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_partial_pwrite_pread.c b/tests/i915/gem_partial_pwrite_pread.c
new file mode 100644
index 00000000..35e39ad7
--- /dev/null
+++ b/tests/i915/gem_partial_pwrite_pread.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Test pwrite/pread consistency when touching partial"
+		     " cachelines.");
+
+/*
+ * Testcase: pwrite/pread consistency when touching partial cachelines
+ *
+ * Some fancy new pwrite/pread optimizations clflush in-line while
+ * reading/writing. Check whether all required clflushes happen.
+ *
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+drm_intel_bo *scratch_bo;
+drm_intel_bo *staging_bo;
+#define BO_SIZE (4*4096)
+uint32_t devid;
+int fd;
+
+static void
+copy_bo(drm_intel_bo *src, drm_intel_bo *dst)
+{
+	BLIT_COPY_BATCH_START(0);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  4096);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH((BO_SIZE/4096) << 16 | 1024);
+	OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(4096);
+	OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+static void
+blt_bo_fill(drm_intel_bo *tmp_bo, drm_intel_bo *bo, uint8_t val)
+{
+	uint8_t *gtt_ptr;
+	int i;
+
+	do_or_die(drm_intel_gem_bo_map_gtt(tmp_bo));
+	gtt_ptr = tmp_bo->virtual;
+
+	for (i = 0; i < BO_SIZE; i++)
+		gtt_ptr[i] = val;
+
+	drm_intel_gem_bo_unmap_gtt(tmp_bo);
+
+	igt_drop_caches_set(fd, DROP_BOUND);
+
+	copy_bo(tmp_bo, bo);
+}
+
+#define MAX_BLT_SIZE 128
+#define ROUNDS 1000
+uint8_t tmp[BO_SIZE];
+
+static void get_range(int *start, int *len)
+{
+	*start = random() % (BO_SIZE - 1);
+	*len = random() % (BO_SIZE - *start - 1) + 1;
+}
+
+static void test_partial_reads(void)
+{
+	int i, j;
+
+	igt_info("checking partial reads\n");
+	for (i = 0; i < ROUNDS; i++) {
+		uint8_t val = i;
+		int start, len;
+
+		blt_bo_fill(staging_bo, scratch_bo, val);
+
+		get_range(&start, &len);
+		do_or_die(drm_intel_bo_get_subdata(scratch_bo, start, len, tmp));
+		for (j = 0; j < len; j++) {
+			igt_assert_f(tmp[j] == val,
+				     "mismatch at %i [%i + %i], got: %i, expected: %i\n",
+				     j, start, len, tmp[j], val);
+		}
+
+		igt_progress("partial reads test: ", i, ROUNDS);
+	}
+}
+
+static void test_partial_writes(void)
+{
+	int i, j;
+	uint8_t *gtt_ptr;
+
+	igt_info("checking partial writes\n");
+	for (i = 0; i < ROUNDS; i++) {
+		uint8_t val = i;
+		int start, len;
+
+		blt_bo_fill(staging_bo, scratch_bo, val);
+
+		memset(tmp, i + 63, BO_SIZE);
+
+		get_range(&start, &len);
+		drm_intel_bo_subdata(scratch_bo, start, len, tmp);
+
+		copy_bo(scratch_bo, staging_bo);
+		drm_intel_gem_bo_map_gtt(staging_bo);
+		gtt_ptr = staging_bo->virtual;
+
+		for (j = 0; j < start; j++) {
+			igt_assert_f(gtt_ptr[j] == val,
+				     "mismatch at %i (start=%i), got: %i, expected: %i\n",
+				     j, start, tmp[j], val);
+		}
+		for (; j < start + len; j++) {
+			igt_assert_f(gtt_ptr[j] == tmp[0],
+				     "mismatch at %i (%i/%i), got: %i, expected: %i\n",
+				     j, j-start, len, tmp[j], i);
+		}
+		for (; j < BO_SIZE; j++) {
+			igt_assert_f(gtt_ptr[j] == val,
+				     "mismatch at %i (end=%i), got: %i, expected: %i\n",
+				     j, start+len, tmp[j], val);
+		}
+		drm_intel_gem_bo_unmap_gtt(staging_bo);
+
+		igt_progress("partial writes test: ", i, ROUNDS);
+	}
+}
+
+static void test_partial_read_writes(void)
+{
+	int i, j;
+	uint8_t *gtt_ptr;
+
+	igt_info("checking partial writes after partial reads\n");
+	for (i = 0; i < ROUNDS; i++) {
+		uint8_t val = i;
+		int start, len;
+
+		blt_bo_fill(staging_bo, scratch_bo, val);
+
+		/* partial read */
+		get_range(&start, &len);
+		drm_intel_bo_get_subdata(scratch_bo, start, len, tmp);
+		for (j = 0; j < len; j++) {
+			igt_assert_f(tmp[j] == val,
+				     "mismatch in read at %i [%i + %i], got: %i, expected: %i\n",
+				     j, start, len, tmp[j], val);
+		}
+
+		/* Change contents through gtt to make the pread cachelines
+		 * stale. */
+		val += 17;
+		blt_bo_fill(staging_bo, scratch_bo, val);
+
+		/* partial write */
+		memset(tmp, i + 63, BO_SIZE);
+
+		get_range(&start, &len);
+		drm_intel_bo_subdata(scratch_bo, start, len, tmp);
+
+		copy_bo(scratch_bo, staging_bo);
+		do_or_die(drm_intel_gem_bo_map_gtt(staging_bo));
+		gtt_ptr = staging_bo->virtual;
+
+		for (j = 0; j < start; j++) {
+			igt_assert_f(gtt_ptr[j] == val,
+				     "mismatch at %i (start=%i), got: %i, expected: %i\n",
+				     j, start, tmp[j], val);
+		}
+		for (; j < start + len; j++) {
+			igt_assert_f(gtt_ptr[j] == tmp[0],
+				     "mismatch at %i (%i/%i), got: %i, expected: %i\n",
+				     j, j - start, len, tmp[j], tmp[0]);
+		}
+		for (; j < BO_SIZE; j++) {
+			igt_assert_f(gtt_ptr[j] == val,
+				     "mismatch at %i (end=%i), got: %i, expected: %i\n",
+				     j, start + len, tmp[j], val);
+		}
+		drm_intel_gem_bo_unmap_gtt(staging_bo);
+
+		igt_progress("partial read/writes test: ", i, ROUNDS);
+	}
+}
+
+static void do_tests(int cache_level, const char *suffix)
+{
+	igt_fixture {
+		if (cache_level != -1)
+			gem_set_caching(fd, scratch_bo->handle, cache_level);
+	}
+
+	igt_subtest_f("reads%s", suffix)
+		test_partial_reads();
+
+	igt_subtest_f("write%s", suffix)
+		test_partial_writes();
+
+	igt_subtest_f("writes-after-reads%s", suffix)
+		test_partial_read_writes();
+}
+
+igt_main
+{
+	srandom(0xdeadbeef);
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		devid = intel_get_drm_devid(fd);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+		/* overallocate the buffers we're actually using because */
+		scratch_bo = drm_intel_bo_alloc(bufmgr, "scratch bo", BO_SIZE, 4096);
+		staging_bo = drm_intel_bo_alloc(bufmgr, "staging bo", BO_SIZE, 4096);
+	}
+
+	do_tests(-1, "");
+
+	/* Repeat the tests using different levels of snooping */
+	do_tests(0, "-uncached");
+	do_tests(1, "-snoop");
+	do_tests(2, "-display");
+
+	igt_fixture {
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_persistent_relocs.c b/tests/i915/gem_persistent_relocs.c
new file mode 100644
index 00000000..452fe686
--- /dev/null
+++ b/tests/i915/gem_persistent_relocs.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <signal.h>
+#include <sys/wait.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Test persistent relocations as used by uxa/libva.");
+
+/*
+ * Testcase: Persistent relocations as used by uxa/libva
+ *
+ * Note: this currently fails on byt/full-ppgtt
+ * https://bugs.freedesktop.org/show_bug.cgi?id=84859
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+uint32_t blob[2048*2048];
+#define NUM_TARGET_BOS 16
+drm_intel_bo *pc_target_bo[NUM_TARGET_BOS];
+drm_intel_bo *dummy_bo;
+drm_intel_bo *special_bos[NUM_TARGET_BOS];
+uint32_t relocs_bo_handle[NUM_TARGET_BOS];
+void *gtt_relocs_ptr[NUM_TARGET_BOS];
+uint32_t devid;
+int special_reloc_ofs;
+int special_line_ofs;
+int special_batch_len;
+
+int small_pitch = 64;
+
+static drm_intel_bo *create_special_bo(void)
+{
+	drm_intel_bo *bo;
+	uint32_t data[1024];
+	int len = 0;
+#define BATCH(dw) data[len++] = (dw);
+
+	memset(data, 0, 4096);
+	bo = drm_intel_bo_alloc(bufmgr, "special batch", 4096, 4096);
+
+	if (intel_gen(devid) >= 8) {
+		BATCH(MI_NOOP);
+		BATCH(XY_COLOR_BLT_CMD_NOLEN | 5 |
+				COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB);
+	} else {
+		BATCH(XY_COLOR_BLT_CMD_NOLEN | 4 |
+				COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB);
+	}
+
+	BATCH((3 << 24) | (0xf0 << 16) | small_pitch);
+	special_line_ofs = 4*len;
+	BATCH(0);
+	BATCH(1 << 16 | 1);
+	special_reloc_ofs = 4*len;
+	BATCH(0);
+	if (intel_gen(devid) >= 8)
+		BATCH(0); /* FIXME */
+	BATCH(0xdeadbeef);
+
+#define CMD_POLY_STIPPLE_OFFSET       0x7906
+	/* batchbuffer end */
+	if (IS_GEN5(batch->devid)) {
+		BATCH(CMD_POLY_STIPPLE_OFFSET << 16);
+		BATCH(0);
+	}
+	igt_assert_eq(len % 2, 0);
+	BATCH(MI_NOOP);
+	BATCH(MI_BATCH_BUFFER_END);
+
+	drm_intel_bo_subdata(bo, 0, 4096, data);
+	special_batch_len = len*4;
+
+	return bo;
+}
+
+static void emit_dummy_load(int pitch)
+{
+	int i;
+	uint32_t tile_flags = 0;
+
+	if (IS_965(devid)) {
+		pitch /= 4;
+		tile_flags = XY_SRC_COPY_BLT_SRC_TILED |
+			XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	for (i = 0; i < 5; i++) {
+		BLIT_COPY_BATCH_START(tile_flags);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  pitch);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((2048) << 16 | (2048));
+		OUT_RELOC_FENCED(dummy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(pitch);
+		OUT_RELOC_FENCED(dummy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+	intel_batchbuffer_flush(batch);
+}
+
+static void faulting_reloc_and_emit(int fd, drm_intel_bo *target_bo,
+				    void *gtt_relocs, drm_intel_bo *special_bo)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[2];
+	int ring;
+
+	if (intel_gen(devid) >= 6)
+		ring = I915_EXEC_BLT;
+	else
+		ring = 0;
+
+	exec[0].handle = target_bo->handle;
+	exec[0].relocation_count = 0;
+	exec[0].relocs_ptr = 0;
+	exec[0].alignment = 0;
+	exec[0].offset = 0;
+	exec[0].flags = 0;
+	exec[0].rsvd1 = 0;
+	exec[0].rsvd2 = 0;
+
+	exec[1].handle = special_bo->handle;
+	exec[1].relocation_count = 1;
+	/* A newly mmap gtt bo will fault on first access. */
+	exec[1].relocs_ptr = to_user_pointer(gtt_relocs);
+	exec[1].alignment = 0;
+	exec[1].offset = 0;
+	exec[1].flags = 0;
+	exec[1].rsvd1 = 0;
+	exec[1].rsvd2 = 0;
+
+	execbuf.buffers_ptr = to_user_pointer(exec);
+	execbuf.buffer_count = 2;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = special_batch_len;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	gem_execbuf(fd, &execbuf);
+}
+
+static void do_test(int fd, bool faulting_reloc)
+{
+	uint32_t tiling_mode = I915_TILING_X;
+	unsigned long pitch, act_size;
+	uint32_t test;
+	int i, repeat;
+
+	if (faulting_reloc)
+		igt_disable_prefault();
+
+	act_size = 2048;
+	dummy_bo = drm_intel_bo_alloc_tiled(bufmgr, "tiled dummy_bo", act_size, act_size,
+				      4, &tiling_mode, &pitch, 0);
+
+	drm_intel_bo_subdata(dummy_bo, 0, act_size*act_size*4, blob);
+
+	for (i = 0; i < NUM_TARGET_BOS; i++) {
+		struct drm_i915_gem_relocation_entry reloc[1];
+
+		special_bos[i] = create_special_bo();
+		pc_target_bo[i] = drm_intel_bo_alloc(bufmgr, "special batch", 4096, 4096);
+		igt_assert(pc_target_bo[i]->offset == 0);
+
+		reloc[0].offset = special_reloc_ofs;
+		reloc[0].delta = 0;
+		reloc[0].target_handle = pc_target_bo[i]->handle;
+		reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc[0].presumed_offset = 0;
+
+		relocs_bo_handle[i] = gem_create(fd, 4096);
+		gem_write(fd, relocs_bo_handle[i], 0, reloc, sizeof(reloc));
+		gtt_relocs_ptr[i] = gem_mmap__gtt(fd, relocs_bo_handle[i], 4096,
+						  PROT_READ | PROT_WRITE);
+	}
+
+	/* repeat must be smaller than 4096/small_pitch */
+	for (repeat = 0; repeat < 8; repeat++) {
+		for (i = 0; i < NUM_TARGET_BOS; i++) {
+			uint32_t data[2] = {
+				(repeat << 16) | 0,
+				((repeat + 1) << 16) | 1
+			};
+
+			drm_intel_bo_subdata(special_bos[i], special_line_ofs, 8, &data);
+
+			emit_dummy_load(pitch);
+			faulting_reloc_and_emit(fd, pc_target_bo[i],
+						gtt_relocs_ptr[i],
+						special_bos[i]);
+		}
+	}
+
+	/* Only check at the end to avoid unnecessarily synchronous behaviour. */
+	for (i = 0; i < NUM_TARGET_BOS; i++) {
+		/* repeat must be smaller than 4096/small_pitch */
+		for (repeat = 0; repeat < 8; repeat++) {
+			drm_intel_bo_get_subdata(pc_target_bo[i],
+						 repeat*small_pitch, 4, &test);
+			igt_assert_f(test == 0xdeadbeef,
+				     "mismatch in buffer %i: 0x%08x instead of 0xdeadbeef at offset %i\n",
+				     i, test, repeat*small_pitch);
+		}
+		drm_intel_bo_unreference(pc_target_bo[i]);
+		drm_intel_bo_unreference(special_bos[i]);
+		gem_close(fd, relocs_bo_handle[i]);
+		munmap(gtt_relocs_ptr[i], 4096);
+	}
+
+	drm_intel_gem_bo_map_gtt(dummy_bo);
+	drm_intel_gem_bo_unmap_gtt(dummy_bo);
+
+	drm_intel_bo_unreference(dummy_bo);
+
+	if (faulting_reloc)
+		igt_enable_prefault();
+}
+
+#define INTERRUPT	(1 << 0)
+#define FAULTING	(1 << 1)
+#define THRASH		(1 << 2)
+#define THRASH_INACTIVE	(1 << 3)
+#define ALL_FLAGS	(INTERRUPT | FAULTING | THRASH | THRASH_INACTIVE)
+static void do_forked_test(int fd, unsigned flags)
+{
+	int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+	struct igt_helper_process thrasher = {};
+
+	if (flags & (THRASH | THRASH_INACTIVE)) {
+		uint64_t val = (flags & THRASH_INACTIVE) ?
+				(DROP_RETIRE | DROP_BOUND | DROP_UNBOUND) : DROP_ALL;
+
+		igt_fork_helper(&thrasher) {
+			while (1) {
+				usleep(1000);
+				igt_drop_caches_set(fd, val);
+			}
+		}
+	}
+
+	igt_fork(i, num_threads) {
+		/* re-create process local data */
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+		if (flags & INTERRUPT)
+			igt_fork_signal_helper();
+
+		do_test(fd, flags & FAULTING);
+
+		if (flags & INTERRUPT)
+			igt_stop_signal_helper();
+	}
+
+	igt_waitchildren();
+	if (flags & (THRASH | THRASH_INACTIVE))
+		igt_stop_helper(&thrasher);
+}
+
+int fd;
+
+#define MAX_BLT_SIZE 128
+igt_main
+{
+	igt_skip_on_simulation();
+
+	memset(blob, 'A', sizeof(blob));
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		/* disable reuse, otherwise the test fails */
+		//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+		devid = intel_get_drm_devid(fd);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+	}
+
+	igt_subtest("normal")
+		do_test(fd, false);
+
+	igt_fork_signal_helper();
+	igt_subtest("interruptible")
+		do_test(fd, false);
+	igt_stop_signal_helper();
+
+	for (unsigned flags = 0; flags <= ALL_FLAGS; flags++) {
+		if ((flags & THRASH) && (flags & THRASH_INACTIVE))
+			continue;
+
+		igt_subtest_f("forked%s%s%s%s",
+			      flags & INTERRUPT ? "-interruptible" : "",
+			      flags & FAULTING ? "-faulting-reloc" : "",
+			      flags & THRASH ? "-thrashing" : "",
+			      flags & THRASH_INACTIVE ? "-thrash-inactive" : "")
+			do_forked_test(fd, flags);
+	}
+
+	igt_fixture {
+		intel_batchbuffer_free(batch);
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_pipe_control_store_loop.c b/tests/i915/gem_pipe_control_store_loop.c
new file mode 100644
index 00000000..db23e33d
--- /dev/null
+++ b/tests/i915/gem_pipe_control_store_loop.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch> (based on gem_storedw_*.c)
+ *
+ */
+
+/*
+ * Testcase: (TLB-)Coherency of pipe_control QW writes
+ *
+ * Writes a counter-value into an always newly allocated target bo (by disabling
+ * buffer reuse). Decently trashes on tlb inconsistencies, too.
+ */
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control QW writes.");
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+uint32_t devid;
+
+#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
+#define   PIPE_CONTROL_WRITE_IMMEDIATE	(1<<14)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP	(3<<14)
+#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
+#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
+#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
+#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1)
+#define   PIPE_CONTROL_CS_STALL	(1<<20)
+#define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+
+/* Like the store dword test, but we create new command buffers each time */
+static void
+store_pipe_control_loop(bool preuse_buffer)
+{
+	int i, val = 0;
+	uint32_t *buf;
+	drm_intel_bo *target_bo;
+
+	for (i = 0; i < SLOW_QUICK(0x10000, 4); i++) {
+		/* we want to check tlb consistency of the pipe_control target,
+		 * so get a new buffer every time around */
+		target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+		igt_assert(target_bo);
+
+		if (preuse_buffer) {
+			COLOR_BLIT_COPY_BATCH_START(0);
+			OUT_BATCH((3 << 24) | (0xf0 << 16) | 64);
+			OUT_BATCH(0);
+			OUT_BATCH(1 << 16 | 1);
+
+			/*
+			 * IMPORTANT: We need to preuse the buffer in a
+			 * different domain than what the pipe control write
+			 * (and kernel wa) uses!
+			 */
+			OUT_RELOC_FENCED(target_bo,
+			     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+			     0);
+			OUT_BATCH(0xdeadbeef);
+			ADVANCE_BATCH();
+
+			intel_batchbuffer_flush(batch);
+		}
+
+		/* gem_storedw_batches_loop.c is a bit overenthusiastic with
+		 * creating new batchbuffers - with buffer reuse disabled, the
+		 * support code will do that for us. */
+		if (batch->gen >= 8) {
+			BEGIN_BATCH(4, 1);
+			OUT_BATCH(GFX_OP_PIPE_CONTROL + 1);
+			OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
+			OUT_RELOC_FENCED(target_bo,
+			     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+			     PIPE_CONTROL_GLOBAL_GTT);
+			OUT_BATCH(val); /* write data */
+			ADVANCE_BATCH();
+
+		} else if (batch->gen >= 6) {
+			/* work-around hw issue, see intel_emit_post_sync_nonzero_flush
+			 * in mesa sources. */
+			BEGIN_BATCH(4, 1);
+			OUT_BATCH(GFX_OP_PIPE_CONTROL);
+			OUT_BATCH(PIPE_CONTROL_CS_STALL |
+			     PIPE_CONTROL_STALL_AT_SCOREBOARD);
+			OUT_BATCH(0); /* address */
+			OUT_BATCH(0); /* write data */
+			ADVANCE_BATCH();
+
+			BEGIN_BATCH(4, 1);
+			OUT_BATCH(GFX_OP_PIPE_CONTROL);
+			OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
+			OUT_RELOC(target_bo,
+			     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 
+			     PIPE_CONTROL_GLOBAL_GTT);
+			OUT_BATCH(val); /* write data */
+			ADVANCE_BATCH();
+		} else if (batch->gen >= 4) {
+			BEGIN_BATCH(4, 1);
+			OUT_BATCH(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_WC_FLUSH |
+					PIPE_CONTROL_TC_FLUSH |
+					PIPE_CONTROL_WRITE_IMMEDIATE | 2);
+			OUT_RELOC(target_bo,
+				I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+				PIPE_CONTROL_GLOBAL_GTT);
+			OUT_BATCH(val);
+			OUT_BATCH(0xdeadbeef);
+			ADVANCE_BATCH();
+		}
+
+		intel_batchbuffer_flush_on_ring(batch, 0);
+
+		drm_intel_bo_map(target_bo, 1);
+
+		buf = target_bo->virtual;
+		igt_assert(buf[0] == val);
+
+		drm_intel_bo_unmap(target_bo);
+		/* Make doublesure that this buffer won't get reused. */
+		drm_intel_bo_disable_reuse(target_bo);
+		drm_intel_bo_unreference(target_bo);
+
+		val++;
+	}
+}
+
+int fd;
+
+igt_main
+{
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		devid = intel_get_drm_devid(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		igt_assert(bufmgr);
+
+		igt_skip_on(IS_GEN2(devid) || IS_GEN3(devid));
+		igt_skip_on(devid == PCI_CHIP_I965_G); /* has totally broken pipe control */
+
+		/* IMPORTANT: No call to
+		 * drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+		 * here because we wan't to have fresh buffers (to trash the tlb)
+		 * every time! */
+
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+		igt_assert(batch);
+	}
+
+	igt_subtest("fresh-buffer")
+		store_pipe_control_loop(false);
+
+	igt_subtest("reused-buffer")
+		store_pipe_control_loop(true);
+
+	igt_fixture {
+		intel_batchbuffer_free(batch);
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_ppgtt.c b/tests/i915/gem_ppgtt.c
new file mode 100644
index 00000000..af5e3e07
--- /dev/null
+++ b/tests/i915/gem_ppgtt.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+#include "igt_debugfs.h"
+
+#define WIDTH 512
+#define STRIDE (WIDTH*4)
+#define HEIGHT 512
+#define SIZE (HEIGHT*STRIDE)
+
+static drm_intel_bo *create_bo(drm_intel_bufmgr *bufmgr,
+			       uint32_t pixel)
+{
+	drm_intel_bo *bo;
+	uint32_t *v;
+
+	bo = drm_intel_bo_alloc(bufmgr, "surface", SIZE, 4096);
+	igt_assert(bo);
+
+	do_or_die(drm_intel_bo_map(bo, 1));
+	v = bo->virtual;
+	for (int i = 0; i < SIZE/4; i++)
+		v[i] = pixel;
+	drm_intel_bo_unmap(bo);
+
+	return bo;
+}
+
+static void scratch_buf_init(struct igt_buf *buf,
+			     drm_intel_bufmgr *bufmgr,
+			     uint32_t pixel)
+{
+	memset(buf, 0, sizeof(*buf));
+
+	buf->bo = create_bo(bufmgr, pixel);
+	buf->stride = STRIDE;
+	buf->tiling = I915_TILING_NONE;
+	buf->size = SIZE;
+}
+
+static void scratch_buf_fini(struct igt_buf *buf)
+{
+	drm_intel_bo_unreference(buf->bo);
+	memset(buf, 0, sizeof(*buf));
+}
+
+static void fork_rcs_copy(int target, drm_intel_bo **dst, int count, unsigned flags)
+#define CREATE_CONTEXT 0x1
+{
+	igt_render_copyfunc_t render_copy;
+	int devid;
+
+	for (int child = 0; child < count; child++) {
+		int fd = drm_open_driver(DRIVER_INTEL);
+		drm_intel_bufmgr *bufmgr;
+
+		devid = intel_get_drm_devid(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		igt_assert(bufmgr);
+
+		dst[child] = create_bo(bufmgr, ~0);
+
+		if (flags & CREATE_CONTEXT) {
+			drm_intel_context *ctx;
+
+			ctx = drm_intel_gem_context_create(dst[child]->bufmgr);
+			igt_require(ctx);
+		}
+
+		render_copy = igt_get_render_copyfunc(devid);
+		igt_require_f(render_copy,
+			      "no render-copy function\n");
+	}
+
+	igt_fork(child, count) {
+		struct intel_batchbuffer *batch;
+		struct igt_buf buf = {};
+
+		batch = intel_batchbuffer_alloc(dst[child]->bufmgr,
+						devid);
+		igt_assert(batch);
+
+		if (flags & CREATE_CONTEXT) {
+			drm_intel_context *ctx;
+
+			ctx = drm_intel_gem_context_create(dst[child]->bufmgr);
+			intel_batchbuffer_set_context(batch, ctx);
+		}
+
+		buf.bo = dst[child];
+		buf.stride = STRIDE;
+		buf.tiling = I915_TILING_NONE;
+		buf.size = SIZE;
+
+		for (int i = 0; i <= target; i++) {
+			struct igt_buf src;
+
+			scratch_buf_init(&src, dst[child]->bufmgr,
+					 i | child << 16);
+
+			render_copy(batch, NULL,
+				    &src, 0, 0,
+				    WIDTH, HEIGHT,
+				    &buf, 0, 0);
+
+			scratch_buf_fini(&src);
+		}
+	}
+}
+
+static void fork_bcs_copy(int target, drm_intel_bo **dst, int count)
+{
+	int devid;
+
+	for (int child = 0; child < count; child++) {
+		drm_intel_bufmgr *bufmgr;
+		int fd = drm_open_driver(DRIVER_INTEL);
+
+		devid = intel_get_drm_devid(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		igt_assert(bufmgr);
+
+		dst[child] = create_bo(bufmgr, ~0);
+	}
+
+	igt_fork(child, count) {
+		struct intel_batchbuffer *batch;
+
+		batch = intel_batchbuffer_alloc(dst[child]->bufmgr,
+						devid);
+		igt_assert(batch);
+
+		for (int i = 0; i <= target; i++) {
+			drm_intel_bo *src[2];
+
+			src[0] = create_bo(dst[child]->bufmgr,
+					   ~0);
+			src[1] = create_bo(dst[child]->bufmgr,
+					   i | child << 16);
+
+			intel_copy_bo(batch, src[0], src[1], SIZE);
+			intel_copy_bo(batch, dst[child], src[0], SIZE);
+
+			drm_intel_bo_unreference(src[1]);
+			drm_intel_bo_unreference(src[0]);
+		}
+	}
+}
+
+static void surfaces_check(drm_intel_bo **bo, int count, uint32_t expected)
+{
+	for (int child = 0; child < count; child++) {
+		uint32_t *ptr;
+
+		do_or_die(drm_intel_bo_map(bo[child], 0));
+		ptr = bo[child]->virtual;
+		for (int j = 0; j < SIZE/4; j++)
+			igt_assert_eq(ptr[j], expected | child << 16);
+		drm_intel_bo_unmap(bo[child]);
+	}
+}
+
+static uint64_t exec_and_get_offset(int fd, uint32_t batch)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[1];
+	uint32_t batch_data[2] = { MI_BATCH_BUFFER_END };
+
+	gem_write(fd, batch, 0, batch_data, sizeof(batch_data));
+
+	memset(exec, 0, sizeof(exec));
+	exec[0].handle = batch;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(exec);
+	execbuf.buffer_count = 1;
+
+	gem_execbuf(fd, &execbuf);
+	igt_assert_neq(exec[0].offset, -1);
+
+	return exec[0].offset;
+}
+
+static void flink_and_close(void)
+{
+	uint32_t fd, fd2;
+	uint32_t bo, flinked_bo, new_bo, name;
+	uint64_t offset, offset_new;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require(gem_uses_full_ppgtt(fd));
+
+	bo = gem_create(fd, 4096);
+	name = gem_flink(fd, bo);
+
+	fd2 = drm_open_driver(DRIVER_INTEL);
+
+	flinked_bo = gem_open(fd2, name);
+	offset = exec_and_get_offset(fd2, flinked_bo);
+	gem_sync(fd2, flinked_bo);
+	gem_close(fd2, flinked_bo);
+
+	igt_drop_caches_set(fd, DROP_RETIRE | DROP_IDLE);
+
+	/* the flinked bo VMA should have been cleared now, so a new bo of the
+	 * same size should get the same offset
+	 */
+	new_bo = gem_create(fd2, 4096);
+	offset_new = exec_and_get_offset(fd2, new_bo);
+	gem_close(fd2, new_bo);
+
+	igt_assert_eq(offset, offset_new);
+
+	gem_close(fd, bo);
+	close(fd);
+	close(fd2);
+}
+
+static void flink_and_exit(void)
+{
+	uint32_t fd, fd2, fd3;
+	uint32_t bo, flinked_bo, name;
+	char match[20];
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require(gem_uses_full_ppgtt(fd));
+
+	bo = gem_create(fd, 4096);
+	name = gem_flink(fd, bo);
+	snprintf(match, sizeof(match), "(name: %u)", name);
+
+	fd2 = drm_open_driver(DRIVER_INTEL);
+	flinked_bo = gem_open(fd2, name);
+
+	/* Verify VMA is not there yet. */
+	igt_assert(!igt_debugfs_search(fd, "i915_gem_gtt", match));
+
+	exec_and_get_offset(fd2, flinked_bo);
+
+	/* Verify VMA has been created. */
+	igt_assert(igt_debugfs_search(fd, "i915_gem_gtt", match));
+
+	/* Close the context. */
+	close(fd2);
+
+	/* Execute a different and unrelated (wrt object sharing) context to
+	 * ensure engine drops its last context reference.
+	 */
+	fd3 = drm_open_driver(DRIVER_INTEL);
+	exec_and_get_offset(fd3, gem_create(fd3, 4096));
+	close(fd3);
+
+	igt_drop_caches_set(fd, DROP_ACTIVE | DROP_RETIRE | DROP_IDLE);
+	igt_assert(!igt_debugfs_search(fd, "i915_gem_gtt", match));
+
+	close(fd);
+}
+
+#define N_CHILD 8
+int main(int argc, char **argv)
+{
+	igt_subtest_init(argc, argv);
+
+	igt_fixture {
+		int fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		close(fd);
+	}
+
+	igt_subtest("blt-vs-render-ctx0") {
+		drm_intel_bo *bcs[1], *rcs[N_CHILD];
+
+		fork_bcs_copy(0x4000, bcs, 1);
+		fork_rcs_copy(0x8000 / N_CHILD, rcs, N_CHILD, 0);
+
+		igt_waitchildren();
+
+		surfaces_check(bcs, 1, 0x4000);
+		surfaces_check(rcs, N_CHILD, 0x8000 / N_CHILD);
+	}
+
+	igt_subtest("blt-vs-render-ctxN") {
+		drm_intel_bo *bcs[1], *rcs[N_CHILD];
+
+		fork_rcs_copy(0x8000 / N_CHILD, rcs, N_CHILD, CREATE_CONTEXT);
+		fork_bcs_copy(0x4000, bcs, 1);
+
+		igt_waitchildren();
+
+		surfaces_check(bcs, 1, 0x4000);
+		surfaces_check(rcs, N_CHILD, 0x8000 / N_CHILD);
+	}
+
+	igt_subtest("flink-and-close-vma-leak")
+		flink_and_close();
+
+	igt_subtest("flink-and-exit-vma-leak")
+		flink_and_exit();
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_pread.c b/tests/i915/gem_pread.c
new file mode 100644
index 00000000..00379580
--- /dev/null
+++ b/tests/i915/gem_pread.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+#define OBJECT_SIZE 16384
+#define LARGE_OBJECT_SIZE 1024 * 1024
+#define KGRN "\x1B[32m"
+#define KRED "\x1B[31m"
+#define KNRM "\x1B[0m"
+
+static void do_gem_read(int fd, uint32_t handle, void *buf, int len, int loops)
+{
+	while (loops--)
+		gem_read(fd, handle, 0, buf, len);
+}
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+static const char *bytes_per_sec(char *buf, double v)
+{
+	const char *order[] = {
+		"",
+		"KiB",
+		"MiB",
+		"GiB",
+		"TiB",
+		NULL,
+	}, **o = order;
+
+	while (v > 1000 && o[1]) {
+		v /= 1000;
+		o++;
+	}
+	sprintf(buf, "%.1f%s/s", v, *o);
+	return buf;
+}
+
+
+uint32_t *src, dst;
+uint32_t *dst_user, src_stolen, large_stolen;
+uint32_t *stolen_pf_user, *stolen_nopf_user;
+int fd, count;
+
+int main(int argc, char **argv)
+{
+	int object_size = 0;
+	double usecs;
+	char buf[100];
+	const char* bps;
+	const struct {
+		int level;
+		const char *name;
+	} cache[] = {
+		{ 0, "uncached" },
+		{ 1, "snoop" },
+		{ 2, "display" },
+		{ -1 },
+	}, *c;
+
+	igt_subtest_init(argc, argv);
+
+	if (argc > 1 && atoi(argv[1]))
+		object_size = atoi(argv[1]);
+	if (object_size == 0)
+		object_size = OBJECT_SIZE;
+	object_size = (object_size + 3) & -4;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+
+		dst = gem_create(fd, object_size);
+		src = malloc(object_size);
+		src_stolen = gem_create_stolen(fd, object_size);
+		dst_user = malloc(object_size);
+	}
+
+	igt_subtest("basic") {
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			do_gem_read(fd, dst, src, object_size, count);
+			gettimeofday(&end, NULL);
+			usecs = elapsed(&start, &end, count);
+			bps = bytes_per_sec(buf, object_size/usecs*1e6);
+			igt_info("Time to pread %d bytes x %6d:	%7.3fµs, %s\n",
+				 object_size, count, usecs, bps);
+			fflush(stdout);
+		}
+	}
+
+	for (c = cache; c->level != -1; c++) {
+		igt_subtest(c->name) {
+			gem_set_caching(fd, dst, c->level);
+
+			for (count = 1; count <= 1<<17; count <<= 1) {
+				struct timeval start, end;
+
+				gettimeofday(&start, NULL);
+				do_gem_read(fd, dst, src, object_size, count);
+				gettimeofday(&end, NULL);
+				usecs = elapsed(&start, &end, count);
+				bps = bytes_per_sec(buf, object_size/usecs*1e6);
+				igt_info("Time to %s pread %d bytes x %6d:	%7.3fµs, %s\n",
+					 c->name, object_size, count, usecs, bps);
+				fflush(stdout);
+			}
+		}
+	}
+
+	igt_subtest("stolen-normal") {
+		gem_require_stolen_support(fd);
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			do_gem_read(fd, src_stolen, dst_user, object_size, count);
+			gettimeofday(&end, NULL);
+			usecs = elapsed(&start, &end, count);
+			bps = bytes_per_sec(buf, object_size/usecs*1e6);
+			igt_info("Time to pread %d bytes x %6d:	%7.3fµs, %s\n",
+				 object_size, count, usecs, bps);
+			fflush(stdout);
+		}
+	}
+	for (c = cache; c->level != -1; c++) {
+		igt_subtest_f("stolen-%s", c->name) {
+			gem_require_stolen_support(fd);
+			gem_set_caching(fd, src_stolen, c->level);
+
+			for (count = 1; count <= 1<<17; count <<= 1) {
+				struct timeval start, end;
+
+				gettimeofday(&start, NULL);
+				do_gem_read(fd, src_stolen, dst_user,
+					    object_size, count);
+				gettimeofday(&end, NULL);
+				usecs = elapsed(&start, &end, count);
+				bps = bytes_per_sec(buf, object_size/usecs*1e6);
+				igt_info("Time to stolen-%s pread %d bytes x %6d:      %7.3fµs, %s\n",
+					 c->name, object_size, count, usecs, bps);
+				fflush(stdout);
+			}
+		}
+	}
+
+	/* List the time taken in pread operation for stolen objects, with
+	 * and without the overhead of page fault handling on accessing the
+	 * user space buffer
+	 */
+	igt_subtest("pagefault-pread") {
+		gem_require_stolen_support(fd);
+		large_stolen = gem_create_stolen(fd, LARGE_OBJECT_SIZE);
+		stolen_nopf_user = (uint32_t *) mmap(NULL, LARGE_OBJECT_SIZE,
+						PROT_WRITE,
+						MAP_ANONYMOUS|MAP_PRIVATE,
+						-1, 0);
+		igt_assert(stolen_nopf_user);
+
+		for (count = 1; count <= 10; count ++) {
+			struct timeval start, end;
+			double t_elapsed = 0;
+
+			gettimeofday(&start, NULL);
+			do_gem_read(fd, large_stolen, stolen_nopf_user,
+				    LARGE_OBJECT_SIZE, 1);
+			gettimeofday(&end, NULL);
+			t_elapsed = elapsed(&start, &end, count);
+			bps = bytes_per_sec(buf, object_size/t_elapsed*1e6);
+			igt_info("Pagefault-N - Time to pread %d bytes: %7.3fµs, %s\n",
+				 LARGE_OBJECT_SIZE, t_elapsed, bps);
+
+			stolen_pf_user = (uint32_t *) mmap(NULL, LARGE_OBJECT_SIZE,
+						      PROT_WRITE,
+						      MAP_ANONYMOUS|MAP_PRIVATE,
+						      -1, 0);
+			igt_assert(stolen_pf_user);
+
+			gettimeofday(&start, NULL);
+			do_gem_read(fd, large_stolen, stolen_pf_user,
+				    LARGE_OBJECT_SIZE, 1);
+			gettimeofday(&end, NULL);
+			usecs = elapsed(&start, &end, count);
+			bps = bytes_per_sec(buf, object_size/usecs*1e6);
+			igt_info("Pagefault-Y - Time to pread %d bytes: %7.3fµs, %s%s%s\n",
+				 LARGE_OBJECT_SIZE, usecs,
+				 t_elapsed < usecs ? KGRN : KRED, bps, KNRM);
+			fflush(stdout);
+			munmap(stolen_pf_user, LARGE_OBJECT_SIZE);
+		}
+		munmap(stolen_nopf_user, LARGE_OBJECT_SIZE);
+		gem_close(fd, large_stolen);
+	}
+
+
+	igt_fixture {
+		free(src);
+		gem_close(fd, dst);
+		free(dst_user);
+		gem_close(fd, src_stolen);
+
+		close(fd);
+	}
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_pread_after_blit.c b/tests/i915/gem_pread_after_blit.c
new file mode 100644
index 00000000..6ef3ca20
--- /dev/null
+++ b/tests/i915/gem_pread_after_blit.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_pread_after_blit.c
+ *
+ * This is a test of pread's behavior when getting values out of just-drawn-to
+ * buffers.
+ *
+ * The goal is to catch failure in the whole-buffer-flush or
+ * ranged-buffer-flush paths in the kernel.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Test pread behavior when getting values out of"
+		     " just-drawn-to buffers.");
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static const int width = 512, height = 512;
+static const int size = 1024 * 1024;
+
+#define PAGE_SIZE 4096
+
+static drm_intel_bo *
+create_bo(uint32_t val)
+{
+	drm_intel_bo *bo;
+	uint32_t *vaddr;
+	int i;
+
+	bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096);
+
+	/* Fill the BO with dwords starting at start_val */
+	drm_intel_bo_map(bo, 1);
+	vaddr = bo->virtual;
+
+	for (i = 0; i < 1024 * 1024 / 4; i++)
+		vaddr[i] = val++;
+
+	drm_intel_bo_unmap(bo);
+
+	return bo;
+}
+
+static void
+verify_large_read(drm_intel_bo *bo, uint32_t val)
+{
+	uint32_t buf[size / 4];
+	int i;
+
+	drm_intel_bo_get_subdata(bo, 0, size, buf);
+
+	for (i = 0; i < size / 4; i++) {
+		igt_assert_f(buf[i] == val,
+			     "Unexpected value 0x%08x instead of "
+			     "0x%08x at offset 0x%08x (%p)\n",
+			     buf[i], val, i * 4, buf);
+		val++;
+	}
+}
+
+/** This reads at the size that Mesa usees for software fallbacks. */
+static void
+verify_small_read(drm_intel_bo *bo, uint32_t val)
+{
+	uint32_t buf[4096 / 4];
+	int offset, i;
+
+	for (i = 0; i < 4096 / 4; i++)
+		buf[i] = 0x00c0ffee;
+
+	for (offset = 0; offset < size; offset += PAGE_SIZE) {
+		drm_intel_bo_get_subdata(bo, offset, PAGE_SIZE, buf);
+
+		for (i = 0; i < PAGE_SIZE; i += 4) {
+			igt_assert_f(buf[i / 4] == val,
+				     "Unexpected value 0x%08x instead of "
+				     "0x%08x at offset 0x%08x\n",
+				     buf[i / 4], val, i * 4);
+			val++;
+		}
+	}
+}
+
+typedef igt_hang_t (*do_hang)(int fd);
+
+static igt_hang_t no_hang(int fd)
+{
+	return (igt_hang_t){0};
+}
+
+static igt_hang_t bcs_hang(int fd)
+{
+	return igt_hang_ring(fd, batch->gen >= 6 ? I915_EXEC_BLT : I915_EXEC_DEFAULT);
+}
+
+static void do_test(int fd, int cache_level,
+		    drm_intel_bo *src[2],
+		    const uint32_t start[2],
+		    drm_intel_bo *tmp[2],
+		    int loop, do_hang do_hang_func)
+{
+	igt_hang_t hang;
+
+	if (cache_level != -1) {
+		gem_set_caching(fd, tmp[0]->handle, cache_level);
+		gem_set_caching(fd, tmp[1]->handle, cache_level);
+	}
+
+	do {
+		/* First, do a full-buffer read after blitting */
+		intel_copy_bo(batch, tmp[0], src[0], width*height*4);
+		hang = do_hang_func(fd);
+		verify_large_read(tmp[0], start[0]);
+		igt_post_hang_ring(fd, hang);
+		intel_copy_bo(batch, tmp[0], src[1], width*height*4);
+		hang = do_hang_func(fd);
+		verify_large_read(tmp[0], start[1]);
+		igt_post_hang_ring(fd, hang);
+
+		intel_copy_bo(batch, tmp[0], src[0], width*height*4);
+		hang = do_hang_func(fd);
+		verify_small_read(tmp[0], start[0]);
+		igt_post_hang_ring(fd, hang);
+		intel_copy_bo(batch, tmp[0], src[1], width*height*4);
+		hang = do_hang_func(fd);
+		verify_small_read(tmp[0], start[1]);
+		igt_post_hang_ring(fd, hang);
+
+		intel_copy_bo(batch, tmp[0], src[0], width*height*4);
+		hang = do_hang_func(fd);
+		verify_large_read(tmp[0], start[0]);
+		igt_post_hang_ring(fd, hang);
+
+		intel_copy_bo(batch, tmp[0], src[0], width*height*4);
+		intel_copy_bo(batch, tmp[1], src[1], width*height*4);
+		hang = do_hang_func(fd);
+		verify_large_read(tmp[0], start[0]);
+		verify_large_read(tmp[1], start[1]);
+		igt_post_hang_ring(fd, hang);
+
+		intel_copy_bo(batch, tmp[0], src[0], width*height*4);
+		intel_copy_bo(batch, tmp[1], src[1], width*height*4);
+		hang = do_hang_func(fd);
+		verify_large_read(tmp[1], start[1]);
+		verify_large_read(tmp[0], start[0]);
+		igt_post_hang_ring(fd, hang);
+
+		intel_copy_bo(batch, tmp[1], src[0], width*height*4);
+		intel_copy_bo(batch, tmp[0], src[1], width*height*4);
+		hang = do_hang_func(fd);
+		verify_large_read(tmp[0], start[1]);
+		verify_large_read(tmp[1], start[0]);
+		igt_post_hang_ring(fd, hang);
+	} while (--loop);
+}
+
+drm_intel_bo *src[2], *dst[2];
+int fd;
+
+igt_main
+{
+	const uint32_t start[2] = {0, 1024 * 1024 / 4};
+	const struct {
+		const char *name;
+		int cache;
+	} tests[] = {
+		{ "default", -1 },
+		{ "uncached", 0 },
+		{ "snooped", 1 },
+		{ "display", 2 },
+		{ NULL, -1 },
+	}, *t;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+		batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+		src[0] = create_bo(start[0]);
+		src[1] = create_bo(start[1]);
+
+		dst[0] = drm_intel_bo_alloc(bufmgr, "dst bo", size, 4096);
+		dst[1] = drm_intel_bo_alloc(bufmgr, "dst bo", size, 4096);
+	}
+
+	for (t = tests; t->name; t++) {
+		igt_subtest_f("%s-normal", t->name)
+			do_test(fd, t->cache, src, start, dst, 1, no_hang);
+
+		igt_fork_signal_helper();
+		igt_subtest_f("%s-interruptible", t->name)
+			do_test(fd, t->cache, src, start, dst, 100, no_hang);
+		igt_stop_signal_helper();
+
+		igt_subtest_f("%s-hang", t->name)
+			do_test(fd, t->cache, src, start, dst, 1, bcs_hang);
+	}
+
+	igt_fixture {
+		drm_intel_bo_unreference(src[0]);
+		drm_intel_bo_unreference(src[1]);
+		drm_intel_bo_unreference(dst[0]);
+		drm_intel_bo_unreference(dst[1]);
+
+		intel_batchbuffer_free(batch);
+		drm_intel_bufmgr_destroy(bufmgr);
+	}
+
+	close(fd);
+}
diff --git a/tests/i915/gem_pwrite.c b/tests/i915/gem_pwrite.c
new file mode 100644
index 00000000..696bd316
--- /dev/null
+++ b/tests/i915/gem_pwrite.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+#define OBJECT_SIZE 16384
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+#define BLT_SRC_TILED		(1<<15)
+#define BLT_DST_TILED		(1<<11)
+
+static void do_gem_write(int fd, uint32_t handle, void *buf, int len, int loops)
+{
+	while (loops--)
+		gem_write(fd, handle, 0, buf, len);
+}
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+static const char *bytes_per_sec(char *buf, double v)
+{
+	const char *order[] = {
+		"",
+		"KiB",
+		"MiB",
+		"GiB",
+		"TiB",
+		NULL,
+	}, **o = order;
+
+	while (v > 1000 && o[1]) {
+		v /= 1000;
+		o++;
+	}
+	sprintf(buf, "%.1f%s/s", v, *o);
+	return buf;
+}
+
+#define FORWARD 0x1
+#define BACKWARD 0x2
+#define RANDOM 0x4
+static void test_big_cpu(int fd, int scale, unsigned flags)
+{
+	uint64_t offset, size;
+	uint32_t handle;
+
+	switch (scale) {
+	case 0:
+		size = gem_mappable_aperture_size() + 4096;
+		break;
+	case 1:
+		size = gem_global_aperture_size(fd) + 4096;
+		break;
+	case 2:
+		size = gem_aperture_size(fd) + 4096;
+		break;
+	}
+	intel_require_memory(1, size, CHECK_RAM);
+
+	handle = gem_create(fd, size);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+	if (flags & FORWARD) {
+		igt_debug("Forwards\n");
+		for (offset = 0; offset < size; offset += 4096) {
+			int suboffset = (offset >> 12) % (4096 - sizeof(offset));
+			uint64_t tmp;
+
+			gem_write(fd, handle, offset + suboffset, &offset, sizeof(offset));
+			gem_read(fd, handle, offset + suboffset, &tmp, sizeof(tmp));
+			igt_assert_eq_u64(offset, tmp);
+		}
+	}
+
+	if (flags & BACKWARD) {
+		igt_debug("Backwards\n");
+		for (offset = size >> 12; offset--; ) {
+			int suboffset = 4096 - (offset % (4096 - sizeof(offset)));
+			uint64_t tmp;
+
+			gem_write(fd, handle, (offset<<12) + suboffset, &offset, sizeof(offset));
+			gem_read(fd, handle, (offset<<12) + suboffset, &tmp, sizeof(tmp));
+			igt_assert_eq_u64(offset, tmp);
+		}
+	}
+
+	if (flags & RANDOM) {
+		igt_debug("Random\n");
+		for (offset = 0; offset < size >> 12; offset++) {
+			uint64_t tmp = rand() % (size >> 12);
+			int suboffset = tmp % (4096 - sizeof(offset));
+
+			gem_write(fd, handle, (tmp << 12) + suboffset, &offset, sizeof(offset));
+			gem_read(fd, handle, (tmp << 12) + suboffset, &tmp, sizeof(tmp));
+			igt_assert_eq_u64(offset, tmp);
+		}
+	}
+
+	gem_close(fd, handle);
+}
+
+static void test_big_gtt(int fd, int scale, unsigned flags)
+{
+	uint64_t offset, size;
+	uint64_t *ptr;
+	uint32_t handle;
+
+	igt_require(gem_mmap__has_wc(fd));
+	switch (scale) {
+	case 0:
+		size = gem_mappable_aperture_size() + 4096;
+		break;
+	case 1:
+		size = gem_global_aperture_size(fd) + 4096;
+		break;
+	case 2:
+		size = gem_aperture_size(fd) + 4096;
+		break;
+	}
+	intel_require_memory(1, size, CHECK_RAM);
+
+	handle = gem_create(fd, size);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	ptr = gem_mmap__wc(fd, handle, 0, size, PROT_READ);
+
+	if (flags & FORWARD) {
+		igt_debug("Forwards\n");
+		for (offset = 0; offset < size; offset += 4096) {
+			int suboffset = (offset >> 12) % (4096 / sizeof(offset) - 1) * sizeof(offset);
+
+			gem_write(fd, handle, offset + suboffset, &offset, sizeof(offset));
+			gem_set_domain(fd, handle, I915_GEM_DOMAIN_WC, 0);
+			igt_assert_eq_u64(ptr[(offset + suboffset)/sizeof(offset)], offset);
+		}
+	}
+
+	if (flags & BACKWARD) {
+		igt_debug("Backwards\n");
+		for (offset = size >> 12; offset--; ) {
+			int suboffset = (4096 - (offset % (4096 - sizeof(offset)))) & -sizeof(offset);
+			gem_write(fd, handle, (offset<<12) + suboffset, &offset, sizeof(offset));
+			gem_set_domain(fd, handle, I915_GEM_DOMAIN_WC, 0);
+			igt_assert_eq_u64(ptr[((offset<<12) + suboffset)/sizeof(offset)], offset);
+		}
+	}
+
+	if (flags & RANDOM) {
+		igt_debug("Random\n");
+		for (offset = 0; offset < size >> 12; offset++) {
+			uint64_t tmp = rand() % (size >> 12);
+			int suboffset = (tmp % 4096) & -sizeof(offset);
+
+			tmp = (tmp << 12) + suboffset;
+			gem_write(fd, handle, tmp, &offset, sizeof(offset));
+			gem_set_domain(fd, handle, I915_GEM_DOMAIN_WC, 0);
+			igt_assert_eq_u64(ptr[tmp/sizeof(offset)], offset);
+		}
+	}
+
+	munmap(ptr, size);
+	gem_close(fd, handle);
+}
+
+uint32_t *src, dst;
+uint32_t *src_user, dst_stolen;
+int fd;
+
+int main(int argc, char **argv)
+{
+	int object_size = 0;
+	double usecs;
+	const char* bps;
+	char buf[100];
+	int count;
+	const struct {
+		int level;
+		const char *name;
+	} cache[] = {
+		{ 0, "uncached" },
+		{ 1, "snoop" },
+		{ 2, "display" },
+		{ -1 },
+	}, *c;
+
+	igt_subtest_init(argc, argv);
+
+	if (argc > 1 && atoi(argv[1]))
+		object_size = atoi(argv[1]);
+	if (object_size == 0)
+		object_size = OBJECT_SIZE;
+	object_size = (object_size + 3) & -4;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+
+		dst = gem_create(fd, object_size);
+		src = malloc(object_size);
+		dst_stolen = gem_create_stolen(fd, object_size);
+		src_user = malloc(object_size);
+	}
+
+	igt_subtest("basic") {
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			do_gem_write(fd, dst, src, object_size, count);
+			gettimeofday(&end, NULL);
+			usecs = elapsed(&start, &end, count);
+			bps = bytes_per_sec(buf, object_size/usecs*1e6);
+			igt_info("Time to pwrite %d bytes x %6d:	%7.3fµs, %s\n",
+				 object_size, count, usecs, bps);
+			fflush(stdout);
+		}
+	}
+
+	for (c = cache; c->level != -1; c++) {
+		igt_subtest(c->name) {
+			gem_set_caching(fd, dst, c->level);
+
+			for (count = 1; count <= 1<<17; count <<= 1) {
+				struct timeval start, end;
+
+				gettimeofday(&start, NULL);
+				do_gem_write(fd, dst, src, object_size, count);
+				gettimeofday(&end, NULL);
+				usecs = elapsed(&start, &end, count);
+				bps = bytes_per_sec(buf, object_size/usecs*1e6);
+				igt_info("Time to %s pwrite %d bytes x %6d:	%7.3fµs, %s\n",
+					 c->name, object_size, count, usecs, bps);
+				fflush(stdout);
+			}
+		}
+	}
+
+	igt_subtest("stolen-normal") {
+		gem_require_stolen_support(fd);
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			do_gem_write(fd, dst_stolen, src_user,
+				     object_size, count);
+			gettimeofday(&end, NULL);
+			usecs = elapsed(&start, &end, count);
+			bps = bytes_per_sec(buf, object_size/usecs*1e6);
+			igt_info("Time to pwrite %d bytes x %6d:        %7.3fµs, %s\n",
+				 object_size, count, usecs, bps);
+			fflush(stdout);
+		}
+	}
+
+	for (c = cache; c->level != -1; c++) {
+		igt_subtest_f("stolen-%s", c->name) {
+			gem_require_stolen_support(fd);
+			gem_set_caching(fd, dst, c->level);
+			for (count = 1; count <= 1<<17; count <<= 1) {
+				struct timeval start, end;
+
+				gettimeofday(&start, NULL);
+				do_gem_write(fd, dst_stolen, src_user,
+					     object_size, count);
+				gettimeofday(&end, NULL);
+				bps = bytes_per_sec(buf,
+						    object_size/usecs*1e6);
+				igt_info("Time to stolen-%s pwrite %d bytes x %6d:     %7.3fµs, %s\n",
+					 c->name, object_size, count,
+					 usecs, bps);
+				fflush(stdout);
+			}
+		}
+	}
+
+	igt_fixture {
+		free(src);
+		gem_close(fd, dst);
+		free(src_user);
+		gem_close(fd, dst_stolen);
+	}
+
+	{
+		const struct mode {
+			const char *name;
+			unsigned flags;
+		} modes[] = {
+			{ "forwards", FORWARD },
+			{ "backwards", BACKWARD },
+			{ "random", RANDOM },
+			{ "fbr", FORWARD | BACKWARD | RANDOM },
+			{ NULL },
+		}, *m;
+		for (m = modes; m->name; m++) {
+			igt_subtest_f("small-cpu-%s", m->name)
+				test_big_cpu(fd, 0, m->flags);
+			igt_subtest_f("small-gtt-%s", m->name)
+				test_big_gtt(fd, 0, m->flags);
+
+			igt_subtest_f("big-cpu-%s", m->name)
+				test_big_cpu(fd, 1, m->flags);
+			igt_subtest_f("big-gtt-%s", m->name)
+				test_big_gtt(fd, 1, m->flags);
+
+			igt_subtest_f("huge-cpu-%s", m->name)
+				test_big_cpu(fd, 2, m->flags);
+			igt_subtest_f("huge-gtt-%s", m->name)
+				test_big_gtt(fd, 2, m->flags);
+		}
+	}
+
+	igt_fixture
+		close(fd);
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_pwrite_pread.c b/tests/i915/gem_pwrite_pread.c
new file mode 100644
index 00000000..f91fc7c4
--- /dev/null
+++ b/tests/i915/gem_pwrite_pread.c
@@ -0,0 +1,410 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+#define OBJECT_SIZE 16384
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+#define BLT_SRC_TILED		(1<<15)
+#define BLT_DST_TILED		(1<<11)
+
+uint32_t is_64bit;
+uint32_t exec_flags;
+
+static inline void build_batch(uint32_t *batch, int len, uint32_t *batch_len)
+{
+	unsigned int i = 0;
+
+	batch[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB | (is_64bit ? 8 : 6);
+	batch[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | len;
+	batch[i++] = 0;
+	batch[i++] = 1 << 16 | (len / 4);
+	batch[i++] = 0; /* dst */
+	if (is_64bit)
+		batch[i++] = 0;
+	batch[i++] = 0;
+	batch[i++] = len;
+	batch[i++] = 0; /* src */
+	if (is_64bit)
+		batch[i++] = 0;
+	batch[i++] = MI_BATCH_BUFFER_END;
+	batch[i++] = 0;
+
+	*batch_len = i * 4;
+}
+
+#define BUILD_EXEC \
+	uint32_t batch[12]; \
+	struct drm_i915_gem_relocation_entry reloc[] = { \
+		{ dst, 0, 4*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER }, \
+		{ src, 0, (is_64bit ? 8 : 7)*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, 0 }, \
+	}; \
+	struct drm_i915_gem_exec_object2 exec[] = { \
+		{ src }, \
+		{ dst }, \
+		{ gem_create(fd, 4096), 2, to_user_pointer(reloc) } \
+	}; \
+	struct drm_i915_gem_execbuffer2 execbuf = { \
+		to_user_pointer(exec), 3, \
+		0, 0, \
+		0, 0, 0, 0, \
+		exec_flags, \
+	}; \
+	build_batch(batch, len, &execbuf.batch_len); \
+	gem_write(fd, exec[2].handle, 0, batch, execbuf.batch_len);
+
+
+static void copy(int fd, uint32_t src, uint32_t dst, void *buf, int len, int loops)
+{
+	BUILD_EXEC;
+
+	while (loops--) {
+		gem_write(fd, src, 0, buf, len);
+		gem_execbuf(fd, &execbuf);
+		gem_read(fd, dst, 0, buf, len);
+	}
+
+	gem_close(fd, exec[2].handle);
+}
+
+static void as_gtt_mmap(int fd, uint32_t src, uint32_t dst, void *buf, int len, int loops)
+{
+	uint32_t *src_ptr, *dst_ptr;
+	BUILD_EXEC;
+
+	src_ptr = gem_mmap__gtt(fd, src, OBJECT_SIZE, PROT_WRITE);
+	dst_ptr = gem_mmap__gtt(fd, dst, OBJECT_SIZE, PROT_READ);
+
+	while (loops--) {
+		gem_set_domain(fd, src,
+			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+		memcpy(src_ptr, buf, len);
+
+		gem_execbuf(fd, &execbuf);
+		gem_set_domain(fd, dst,
+			       I915_GEM_DOMAIN_GTT, 0);
+		memcpy(buf, dst_ptr, len);
+	}
+
+	munmap(dst_ptr, len);
+	munmap(src_ptr, len);
+	gem_close(fd, exec[2].handle);
+}
+
+
+static void as_cpu_mmap(int fd, uint32_t src, uint32_t dst, void *buf, int len, int loops)
+{
+	uint32_t *src_ptr, *dst_ptr;
+	BUILD_EXEC;
+
+	src_ptr = gem_mmap__cpu(fd, src, 0, OBJECT_SIZE, PROT_WRITE);
+	dst_ptr = gem_mmap__cpu(fd, dst, 0, OBJECT_SIZE, PROT_READ);
+
+	while (loops--) {
+		gem_set_domain(fd, src,
+			       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+		memcpy(src_ptr, buf, len);
+
+		gem_execbuf(fd, &execbuf);
+		gem_set_domain(fd, dst,
+			       I915_GEM_DOMAIN_CPU, 0);
+		memcpy(buf, dst_ptr, len);
+	}
+
+	munmap(dst_ptr, len);
+	munmap(src_ptr, len);
+	gem_close(fd, exec[2].handle);
+}
+
+static void test_copy(int fd, uint32_t src, uint32_t dst, uint32_t *buf, int len)
+{
+	int i;
+	BUILD_EXEC;
+
+	for (i = 0; i < len/4; i++)
+		buf[i] = i;
+
+	gem_write(fd, src, 0, buf, len);
+	memset(buf, 0, len);
+
+	gem_execbuf(fd, &execbuf);
+	gem_read(fd, dst, 0, buf, len);
+
+	gem_close(fd, exec[2].handle);
+
+	for (i = 0; i < len/4; i++)
+		igt_assert(buf[i] == i);
+}
+
+static void test_as_gtt_mmap(int fd, uint32_t src, uint32_t dst, int len)
+{
+	uint32_t *src_ptr, *dst_ptr;
+	int i;
+	BUILD_EXEC;
+
+	src_ptr = gem_mmap__gtt(fd, src, OBJECT_SIZE, PROT_WRITE);
+	dst_ptr = gem_mmap__gtt(fd, dst, OBJECT_SIZE, PROT_READ);
+
+	gem_set_domain(fd, src, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (i = 0; i < len/4; i++)
+		src_ptr[i] = i;
+
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, exec[2].handle);
+
+	gem_set_domain(fd, dst, I915_GEM_DOMAIN_GTT, 0);
+	for (i = 0; i < len/4; i++)
+		igt_assert(dst_ptr[i] == i);
+
+	munmap(dst_ptr, len);
+	munmap(src_ptr, len);
+}
+
+static void test_as_cpu_mmap(int fd, uint32_t src, uint32_t dst, int len)
+{
+	uint32_t *src_ptr, *dst_ptr;
+	int i;
+	BUILD_EXEC;
+
+	src_ptr = gem_mmap__cpu(fd, src, 0, OBJECT_SIZE, PROT_WRITE);
+	dst_ptr = gem_mmap__cpu(fd, dst, 0, OBJECT_SIZE, PROT_READ);
+
+	gem_set_domain(fd, src, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	for (i = 0; i < len/4; i++)
+		src_ptr[i] = i;
+
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, exec[2].handle);
+
+	gem_set_domain(fd, dst, I915_GEM_DOMAIN_CPU, 0);
+	for (i = 0; i < len/4; i++)
+		igt_assert(dst_ptr[i] == i);
+
+	munmap(dst_ptr, len);
+	munmap(src_ptr, len);
+}
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+static const char *bytes_per_sec(char *buf, double v)
+{
+	const char *order[] = {
+		"",
+		"KiB",
+		"MiB",
+		"GiB",
+		"TiB",
+		NULL,
+	}, **o = order;
+
+	while (v > 1000 && o[1]) {
+		v /= 1000;
+		o++;
+	}
+	sprintf(buf, "%.1f%s/s", v, *o);
+	return buf;
+}
+
+uint32_t *tmp, src, dst;
+int fd;
+
+int main(int argc, char **argv)
+{
+	int object_size = 0;
+	uint32_t buf[20];
+	int count;
+
+	igt_subtest_init(argc, argv);
+	igt_skip_on_simulation();
+
+	if (argc > 1)
+		object_size = atoi(argv[1]);
+	if (object_size == 0)
+		object_size = OBJECT_SIZE;
+	object_size = (object_size + 3) & -4;
+
+	igt_fixture {
+		uint32_t devid;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		dst = gem_create(fd, object_size);
+		src = gem_create(fd, object_size);
+		tmp = malloc(object_size);
+
+		gem_set_caching(fd, src, 0);
+		gem_set_caching(fd, dst, 0);
+
+		devid = intel_get_drm_devid(fd);
+		is_64bit = intel_gen(devid) >= 8;
+		exec_flags = HAS_BLT_RING(devid) ? I915_EXEC_BLT : 0;
+	}
+
+	igt_subtest("uncached-copy-correctness")
+		test_copy(fd, src, dst, tmp, object_size);
+	igt_subtest("uncached-copy-performance") {
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			copy(fd, src, dst, tmp, object_size, count);
+			gettimeofday(&end, NULL);
+			igt_info("Time to uncached copy %d bytes x %6d:	%7.3fµs, %s\n",
+				 object_size, count,
+				 elapsed(&start, &end, count),
+				 bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+			fflush(stdout);
+		}
+	}
+
+	igt_subtest("uncached-pwrite-blt-gtt_mmap-correctness")
+		test_as_gtt_mmap(fd, src, dst, object_size);
+	igt_subtest("uncached-pwrite-blt-gtt_mmap-performance") {
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			as_gtt_mmap(fd, src, dst, tmp, object_size, count);
+			gettimeofday(&end, NULL);
+			igt_info("** mmap uncached copy %d bytes x %6d:	%7.3fµs, %s\n",
+				 object_size, count,
+				 elapsed(&start, &end, count),
+				 bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+			fflush(stdout);
+		}
+	}
+
+	igt_fixture {
+		gem_set_caching(fd, src, 1);
+		gem_set_caching(fd, dst, 1);
+	}
+
+	igt_subtest("snooped-copy-correctness")
+		test_copy(fd, src, dst, tmp, object_size);
+	igt_subtest("snooped-copy-performance") {
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			copy(fd, src, dst, tmp, object_size, count);
+			gettimeofday(&end, NULL);
+			igt_info("Time to snooped copy %d bytes x %6d:	%7.3fµs, %s\n",
+				 object_size, count,
+				 elapsed(&start, &end, count),
+				 bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+			fflush(stdout);
+		}
+	}
+
+	igt_subtest("snooped-pwrite-blt-cpu_mmap-correctness")
+		test_as_cpu_mmap(fd, src, dst, object_size);
+	igt_subtest("snooped-pwrite-blt-cpu_mmap-performance") {
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			as_cpu_mmap(fd, src, dst, tmp, object_size, count);
+			gettimeofday(&end, NULL);
+			igt_info("** mmap snooped copy %d bytes x %6d:	%7.3fµs, %s\n",
+				 object_size, count,
+				 elapsed(&start, &end, count),
+				 bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+			fflush(stdout);
+		}
+	}
+
+	igt_fixture {
+		gem_set_caching(fd, src, 2);
+		gem_set_caching(fd, dst, 2);
+	}
+
+	igt_subtest("display-copy-correctness")
+		test_copy(fd, src, dst, tmp, object_size);
+	igt_subtest("display-copy-performance") {
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			copy(fd, src, dst, tmp, object_size, count);
+			gettimeofday(&end, NULL);
+			igt_info("Time to display copy %d bytes x %6d:	%7.3fµs, %s\n",
+				 object_size, count,
+				 elapsed(&start, &end, count),
+				 bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+			fflush(stdout);
+		}
+	}
+
+	igt_subtest("display-pwrite-blt-gtt_mmap-correctness")
+		test_as_gtt_mmap(fd, src, dst, object_size);
+	igt_subtest("display-pwrite-blt-gtt_mmap-performance") {
+		for (count = 1; count <= 1<<17; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			as_gtt_mmap(fd, src, dst, tmp, object_size, count);
+			gettimeofday(&end, NULL);
+			igt_info("** mmap display copy %d bytes x %6d:	%7.3fµs, %s\n",
+				 object_size, count,
+				 elapsed(&start, &end, count),
+				 bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+			fflush(stdout);
+		}
+	}
+
+	igt_fixture {
+		free(tmp);
+		gem_close(fd, src);
+		gem_close(fd, dst);
+
+		close(fd);
+	}
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_pwrite_snooped.c b/tests/i915/gem_pwrite_snooped.c
new file mode 100644
index 00000000..47ccce8b
--- /dev/null
+++ b/tests/i915/gem_pwrite_snooped.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION(
+   "pwrite to a snooped bo then make it uncached and check that the GPU sees the data.");
+
+static int fd;
+static uint32_t devid;
+static drm_intel_bufmgr *bufmgr;
+
+static void blit(drm_intel_bo *dst, drm_intel_bo *src,
+		 unsigned int width, unsigned int height,
+		 unsigned int dst_pitch, unsigned int src_pitch)
+{
+	struct intel_batchbuffer *batch;
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	igt_assert(batch);
+
+	BLIT_COPY_BATCH_START(0);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst_pitch);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(height << 16 | width);
+	OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(src_pitch);
+	OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	if (batch->gen >= 6) {
+		BEGIN_BATCH(3, 0);
+		OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+		ADVANCE_BATCH();
+	}
+
+	intel_batchbuffer_flush(batch);
+	intel_batchbuffer_free(batch);
+}
+
+static void *memchr_inv(const void *s, int c, size_t n)
+{
+	const uint8_t *us = s;
+	const uint8_t uc = c;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+	while (n--) {
+		if (*us != uc)
+			return (void *) us;
+		us++;
+	}
+#pragma GCC diagnostic pop
+
+	return NULL;
+}
+
+static void test(int w, int h)
+{
+	int object_size = w * h * 4;
+	drm_intel_bo *src, *dst;
+	void *buf;
+
+	src = drm_intel_bo_alloc(bufmgr, "src", object_size, 4096);
+	igt_assert(src);
+	dst = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);
+	igt_assert(dst);
+
+	buf = malloc(object_size);
+	igt_assert(buf);
+	memset(buf, 0xff, object_size);
+
+	gem_set_domain(fd, src->handle, I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+
+	gem_set_caching(fd, src->handle, I915_CACHING_CACHED);
+
+	gem_write(fd, src->handle, 0, buf, object_size);
+
+	gem_set_caching(fd, src->handle, I915_CACHING_NONE);
+
+	blit(dst, src, w, h, w * 4, h * 4);
+
+	memset(buf, 0x00, object_size);
+	gem_read(fd, dst->handle, 0, buf, object_size);
+
+	igt_assert(memchr_inv(buf, 0xff, object_size) == NULL);
+}
+
+igt_simple_main
+{
+	igt_skip_on_simulation();
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+
+	devid = intel_get_drm_devid(fd);
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+
+	test(256, 256);
+
+	drm_intel_bufmgr_destroy(bufmgr);
+	close(fd);
+}
diff --git a/tests/i915/gem_read_read_speed.c b/tests/i915/gem_read_read_speed.c
new file mode 100644
index 00000000..3dcf440c
--- /dev/null
+++ b/tests/i915/gem_read_read_speed.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file gem_read_read_speed.c
+ *
+ * This is a test of performance with multiple readers from the same source.
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Test speed of concurrent reads between engines.");
+
+igt_render_copyfunc_t rendercopy;
+struct intel_batchbuffer *batch;
+int width, height;
+
+static drm_intel_bo *rcs_copy_bo(drm_intel_bo *dst, drm_intel_bo *src)
+{
+	struct igt_buf d = {
+		.bo = dst,
+		.size = width * height * 4,
+		.num_tiles = width * height * 4,
+		.stride = width * 4,
+	}, s = {
+		.bo = src,
+		.size = width * height * 4,
+		.num_tiles = width * height * 4,
+		.stride = width * 4,
+	};
+	uint32_t swizzle;
+	drm_intel_bo *bo = batch->bo;
+	drm_intel_bo_reference(bo);
+
+	drm_intel_bo_get_tiling(dst, &d.tiling, &swizzle);
+	drm_intel_bo_get_tiling(src, &s.tiling, &swizzle);
+
+	rendercopy(batch, NULL,
+		   &s, 0, 0,
+		   width, height,
+		   &d, 0, 0);
+
+	return bo;
+}
+
+static drm_intel_bo *bcs_copy_bo(drm_intel_bo *dst, drm_intel_bo *src)
+{
+	drm_intel_bo *bo = batch->bo;
+	drm_intel_bo_reference(bo);
+
+	intel_blt_copy(batch,
+		       src, 0, 0, 4*width,
+		       dst, 0, 0, 4*width,
+		       width, height, 32);
+
+	return bo;
+}
+
+static void
+set_bo(drm_intel_bo *bo, uint32_t val)
+{
+	int size = width * height;
+	uint32_t *vaddr;
+
+	do_or_die(drm_intel_bo_map(bo, 1));
+	vaddr = bo->virtual;
+	while (size--)
+		*vaddr++ = val;
+	drm_intel_bo_unmap(bo);
+}
+
+static double elapsed(const struct timespec *start,
+		      const struct timespec *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000)/loop;
+}
+
+static drm_intel_bo *create_bo(drm_intel_bufmgr *bufmgr,
+			       const char *name)
+{
+	uint32_t tiling_mode = I915_TILING_X;
+	unsigned long pitch;
+	return drm_intel_bo_alloc_tiled(bufmgr, name,
+					width, height, 4,
+					&tiling_mode, &pitch, 0);
+}
+
+static void run(drm_intel_bufmgr *bufmgr, int _width, int _height,
+		bool write_bcs, bool write_rcs)
+{
+	drm_intel_bo *src = NULL, *bcs = NULL, *rcs = NULL;
+	drm_intel_bo *bcs_batch, *rcs_batch;
+	struct timespec start, end;
+	int loops = 1000;
+
+	width = _width;
+	height = _height;
+
+	src = create_bo(bufmgr, "src");
+	bcs = create_bo(bufmgr, "bcs");
+	rcs = create_bo(bufmgr, "rcs");
+
+	set_bo(src, 0xdeadbeef);
+
+	if (write_bcs) {
+		bcs_batch = bcs_copy_bo(src, bcs);
+	} else {
+		bcs_batch = bcs_copy_bo(bcs, src);
+	}
+	if (write_rcs) {
+		rcs_batch = rcs_copy_bo(src, rcs);
+	} else {
+		rcs_batch = rcs_copy_bo(rcs, src);
+	}
+
+	drm_intel_bo_unreference(rcs);
+	drm_intel_bo_unreference(bcs);
+
+	drm_intel_gem_bo_start_gtt_access(src, true);
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	for (int i = 0; i < loops; i++) {
+		drm_intel_gem_bo_context_exec(rcs_batch, NULL, 4096, I915_EXEC_RENDER);
+		drm_intel_gem_bo_context_exec(bcs_batch, NULL, 4096, I915_EXEC_BLT);
+	}
+	drm_intel_gem_bo_start_gtt_access(src, true);
+	clock_gettime(CLOCK_MONOTONIC, &end);
+
+	igt_info("Time to %s-%s %dx%d [%dk]:		%7.3fµs\n",
+		 write_bcs ? "write" : "read",
+		 write_rcs ? "write" : "read",
+		 width, height, 4*width*height/1024,
+		 elapsed(&start, &end, loops));
+
+	drm_intel_bo_unreference(rcs_batch);
+	drm_intel_bo_unreference(bcs_batch);
+
+	drm_intel_bo_unreference(src);
+}
+
+igt_main
+{
+	const int sizes[] = {1, 128, 256, 512, 1024, 2048, 4096, 8192, 0};
+	drm_intel_bufmgr *bufmgr = NULL;
+	int fd, i;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		int devid;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		devid = intel_get_drm_devid(fd);
+		igt_require(intel_gen(devid) >= 6);
+
+		rendercopy = igt_get_render_copyfunc(devid);
+		igt_require(rendercopy);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		igt_assert(bufmgr);
+
+		batch =  intel_batchbuffer_alloc(bufmgr, devid);
+
+		gem_submission_print_method(fd);
+	}
+
+	for (i = 0; sizes[i] != 0; i++) {
+		igt_subtest_f("read-read-%dx%d", sizes[i], sizes[i])
+			run(bufmgr, sizes[i], sizes[i], false, false);
+		igt_subtest_f("read-write-%dx%d", sizes[i], sizes[i])
+			run(bufmgr, sizes[i], sizes[i], false, true);
+		igt_subtest_f("write-read-%dx%d", sizes[i], sizes[i])
+			run(bufmgr, sizes[i], sizes[i], true, false);
+		igt_subtest_f("write-write-%dx%d", sizes[i], sizes[i])
+			run(bufmgr, sizes[i], sizes[i], true, true);
+	}
+}
diff --git a/tests/i915/gem_readwrite.c b/tests/i915/gem_readwrite.c
new file mode 100644
index 00000000..fdf279f1
--- /dev/null
+++ b/tests/i915/gem_readwrite.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+#define OBJECT_SIZE 16384
+
+static int
+do_read(int fd, int handle, void *buf, int offset, int size)
+{
+	struct drm_i915_gem_pread gem_pread;
+
+	/* Ensure that we don't have any convenient data in buf in case
+	 * we fail.
+	 */
+	memset(buf, 0xd0, size);
+
+	memset(&gem_pread, 0, sizeof(gem_pread));
+	gem_pread.handle = handle;
+	gem_pread.data_ptr = to_user_pointer(buf);
+	gem_pread.size = size;
+	gem_pread.offset = offset;
+
+	return ioctl(fd, DRM_IOCTL_I915_GEM_PREAD, &gem_pread);
+}
+
+static int
+do_write(int fd, int handle, void *buf, int offset, int size)
+{
+	struct drm_i915_gem_pwrite gem_pwrite;
+
+	memset(&gem_pwrite, 0, sizeof(gem_pwrite));
+	gem_pwrite.handle = handle;
+	gem_pwrite.data_ptr = to_user_pointer(buf);
+	gem_pwrite.size = size;
+	gem_pwrite.offset = offset;
+
+	return ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &gem_pwrite);
+}
+
+int fd;
+uint32_t handle;
+
+igt_main
+{
+	uint8_t expected[OBJECT_SIZE];
+	uint8_t buf[OBJECT_SIZE];
+	int ret;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+
+		handle = gem_create(fd, OBJECT_SIZE);
+	}
+
+	igt_subtest("new-obj") {
+		igt_info("Testing contents of newly created object.\n");
+		ret = do_read(fd, handle, buf, 0, OBJECT_SIZE);
+		igt_assert(ret == 0);
+		memset(&expected, 0, sizeof(expected));
+		igt_assert(memcmp(expected, buf, sizeof(expected)) == 0);
+	}
+
+	igt_subtest("beyond-EOB") {
+		igt_info("Testing read beyond end of buffer.\n");
+		ret = do_read(fd, handle, buf, OBJECT_SIZE / 2, OBJECT_SIZE);
+		igt_assert(ret == -1 && errno == EINVAL);
+	}
+
+	igt_subtest("read-write") {
+		igt_info("Testing full write of buffer\n");
+		memset(buf, 0, sizeof(buf));
+		memset(buf + 1024, 0x01, 1024);
+		memset(expected + 1024, 0x01, 1024);
+		ret = do_write(fd, handle, buf, 0, OBJECT_SIZE);
+		igt_assert(ret == 0);
+		ret = do_read(fd, handle, buf, 0, OBJECT_SIZE);
+		igt_assert(ret == 0);
+		igt_assert(memcmp(buf, expected, sizeof(buf)) == 0);
+
+		igt_info("Testing partial write of buffer\n");
+		memset(buf + 4096, 0x02, 1024);
+		memset(expected + 4096, 0x02, 1024);
+		ret = do_write(fd, handle, buf + 4096, 4096, 1024);
+		igt_assert(ret == 0);
+		ret = do_read(fd, handle, buf, 0, OBJECT_SIZE);
+		igt_assert(ret == 0);
+		igt_assert(memcmp(buf, expected, sizeof(buf)) == 0);
+
+		igt_info("Testing partial read of buffer\n");
+		ret = do_read(fd, handle, buf, 512, 1024);
+		igt_assert(ret == 0);
+		igt_assert(memcmp(buf, expected + 512, 1024) == 0);
+	}
+
+	igt_subtest("read-bad-handle") {
+		igt_info("Testing read of bad buffer handle\n");
+		ret = do_read(fd, 1234, buf, 0, 1024);
+		igt_assert(ret == -1 && errno == ENOENT);
+	}
+
+	igt_subtest("write-bad-handle") {
+		igt_info("Testing write of bad buffer handle\n");
+		ret = do_write(fd, 1234, buf, 0, 1024);
+		igt_assert(ret == -1 && errno == ENOENT);
+	}
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_reg_read.c b/tests/i915/gem_reg_read.c
new file mode 100644
index 00000000..79facc1f
--- /dev/null
+++ b/tests/i915/gem_reg_read.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include "igt.h"
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/utsname.h>
+#include <time.h>
+
+
+static bool is_x86_64;
+static bool has_proper_timestamp;
+
+struct local_drm_i915_reg_read {
+	__u64 offset;
+	__u64 val; /* Return value */
+};
+
+#define REG_READ_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x31, struct local_drm_i915_reg_read)
+
+#define RENDER_RING_TIMESTAMP 0x2358
+
+static int read_register(int fd, uint64_t offset, uint64_t * val)
+{
+	int ret = 0;
+	struct local_drm_i915_reg_read reg_read;
+	reg_read.offset = offset;
+
+	if (drmIoctl(fd, REG_READ_IOCTL, &reg_read))
+		ret = -errno;
+
+	*val = reg_read.val;
+
+	return ret;
+}
+
+static bool check_kernel_x86_64(void)
+{
+	int ret;
+	struct utsname uts;
+
+	ret = uname(&uts);
+	igt_assert_eq(ret, 0);
+
+	if (!strcmp(uts.machine, "x86_64"))
+		return true;
+
+	return false;
+}
+
+static bool check_timestamp(int fd)
+{
+	int ret;
+	uint64_t val;
+
+	ret = read_register(fd, RENDER_RING_TIMESTAMP | 1, &val);
+
+	return ret == 0;
+}
+
+static int timer_query(int fd, uint64_t * val)
+{
+	uint64_t offset;
+	int ret;
+
+	offset = RENDER_RING_TIMESTAMP;
+	if (has_proper_timestamp)
+		offset |= 1;
+
+	ret = read_register(fd, offset, val);
+
+/*
+ * When reading the timestamp register with single 64b read, we are observing
+ * invalid values on x86_64:
+ *
+ *      [f = valid counter value | X = garbage]
+ *
+ *      i386:   0x0000000fffffffff
+ *      x86_64: 0xffffffffXXXXXXXX
+ *
+ * In the absence of a corrected register read ioctl, attempt
+ * to fix up the return value to be vaguely useful.
+ */
+
+	if (is_x86_64 && !has_proper_timestamp)
+		*val >>= 32;
+
+	return ret;
+}
+
+static void test_timestamp_moving(int fd)
+{
+	uint64_t first_val, second_val;
+
+	igt_fail_on(timer_query(fd, &first_val) != 0);
+	sleep(1);
+	igt_fail_on(timer_query(fd, &second_val) != 0);
+	igt_assert(second_val != first_val);
+}
+
+static void test_timestamp_monotonic(int fd)
+{
+	uint64_t first_val, second_val;
+	time_t start;
+	bool retry = true;
+
+	igt_fail_on(timer_query(fd, &first_val) != 0);
+	time(&start);
+	do {
+retry:
+		igt_fail_on(timer_query(fd, &second_val) != 0);
+		if (second_val < first_val && retry) {
+		/* We may hit timestamp overflow once */
+			retry = false;
+			first_val = second_val;
+			goto retry;
+		}
+		igt_assert(second_val >= first_val);
+	} while(difftime(time(NULL), start) < 5);
+
+}
+
+igt_main
+{
+	uint64_t val = 0;
+	int fd = -1;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		is_x86_64 = check_kernel_x86_64();
+		has_proper_timestamp = check_timestamp(fd);
+	}
+
+	igt_subtest("bad-register")
+		igt_assert_eq(read_register(fd, 0x12345678, &val), -EINVAL);
+
+	igt_subtest("timestamp-moving") {
+		igt_skip_on(timer_query(fd, &val) != 0);
+		test_timestamp_moving(fd);
+	}
+
+	igt_subtest("timestamp-monotonic") {
+		igt_skip_on(timer_query(fd, &val) != 0);
+		test_timestamp_monotonic(fd);
+	}
+
+	igt_fixture {
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_reloc_overflow.c b/tests/i915/gem_reloc_overflow.c
new file mode 100644
index 00000000..c9d1f207
--- /dev/null
+++ b/tests/i915/gem_reloc_overflow.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright � 2013 Google
+ * Copyright � 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kees Cook <keescook@chromium.org>
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *    Rafael Barbalho <rafael.barbalho@intel.com>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <limits.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Check that kernel relocation overflows are caught.");
+
+/*
+ * Testcase: Kernel relocation overflows are caught.
+ */
+
+int fd, entries, num;
+struct drm_i915_gem_exec_object2 *obj;
+struct drm_i915_gem_execbuffer2 execbuf;
+struct drm_i915_gem_relocation_entry *reloc;
+
+static uint32_t target_handle(void)
+{
+	return execbuf.flags & I915_EXEC_HANDLE_LUT ? 0 : obj[0].handle;
+}
+
+static void source_offset_tests(int devid, bool reloc_gtt)
+{
+	struct drm_i915_gem_relocation_entry single_reloc;
+	const char *relocation_type;
+
+	if (reloc_gtt)
+		relocation_type = "reloc-gtt";
+	else
+		relocation_type = "reloc-cpu";
+
+	igt_fixture {
+		obj[1].relocation_count = 0;
+		obj[1].relocs_ptr = 0;
+
+		obj[0].relocation_count = 1;
+		obj[0].relocs_ptr = to_user_pointer(&single_reloc);
+		execbuf.buffer_count = 2;
+
+		if (reloc_gtt) {
+			gem_set_domain(fd, obj[0].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+			relocation_type = "reloc-gtt";
+		} else {
+			gem_set_domain(fd, obj[0].handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+			relocation_type = "reloc-cpu";
+		}
+	}
+
+	/* Special tests for 64b relocs. */
+	igt_subtest_f("source-offset-page-stradle-gen8-%s", relocation_type) {
+		igt_require(intel_gen(devid) >= 8);
+		single_reloc.offset = 4096 - 4;
+		single_reloc.delta = 0;
+		single_reloc.target_handle = target_handle();
+		single_reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+		single_reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+		single_reloc.presumed_offset = -1;
+		gem_execbuf(fd, &execbuf);
+
+		single_reloc.delta = 1024;
+		gem_execbuf(fd, &execbuf);
+	}
+
+	igt_subtest_f("source-offset-end-gen8-%s", relocation_type) {
+		igt_require(intel_gen(devid) >= 8);
+		single_reloc.offset = 8192 - 8;
+		single_reloc.delta = 0;
+		single_reloc.target_handle = target_handle();
+		single_reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+		single_reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+		single_reloc.presumed_offset = -1;
+		gem_execbuf(fd, &execbuf);
+	}
+
+	igt_subtest_f("source-offset-overflow-gen8-%s", relocation_type) {
+		igt_require(intel_gen(devid) >= 8);
+		single_reloc.offset = 8192 - 4;
+		single_reloc.delta = 0;
+		single_reloc.target_handle = target_handle();
+		single_reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+		single_reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+		single_reloc.presumed_offset = -1;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+
+	/* Tests for old 4byte relocs on pre-gen8. */
+	igt_subtest_f("source-offset-end-%s", relocation_type) {
+		igt_require(intel_gen(devid) < 8);
+		single_reloc.offset = 8192 - 4;
+		single_reloc.delta = 0;
+		single_reloc.target_handle = target_handle();
+		single_reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+		single_reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+		single_reloc.presumed_offset = -1;
+		gem_execbuf(fd, &execbuf);
+	}
+
+	igt_subtest_f("source-offset-big-%s", relocation_type) {
+		single_reloc.offset = 8192;
+		single_reloc.delta = 0;
+		single_reloc.target_handle = target_handle();
+		single_reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+		single_reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+		single_reloc.presumed_offset = -1;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+
+	igt_subtest_f("source-offset-negative-%s", relocation_type) {
+		single_reloc.offset = (int64_t) -4;
+		single_reloc.delta = 0;
+		single_reloc.target_handle = target_handle();
+		single_reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+		single_reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+		single_reloc.presumed_offset = -1;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+
+	igt_subtest_f("source-offset-unaligned-%s", relocation_type) {
+		single_reloc.offset = 1;
+		single_reloc.delta = 0;
+		single_reloc.target_handle = target_handle();
+		single_reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+		single_reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+		single_reloc.presumed_offset = -1;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+}
+
+static void reloc_tests(const char *suffix)
+{
+	uint64_t max_relocations;
+	int i;
+
+	max_relocations = min(ULONG_MAX, SIZE_MAX);
+	max_relocations /= sizeof(struct drm_i915_gem_relocation_entry);
+	igt_debug("Maximum allocable relocations: %'llu\n",
+		  (long long)max_relocations);
+
+	igt_subtest_f("invalid-address%s", suffix) {
+		/* Attempt unmapped single entry. */
+		obj[0].relocation_count = 1;
+		obj[0].relocs_ptr = 0;
+		execbuf.buffer_count = 1;
+
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+	}
+
+	igt_subtest_f("single-fault%s", suffix) {
+		obj[0].relocation_count = entries + 1;
+		execbuf.buffer_count = 1;
+
+		/* out-of-bounds after */
+		obj[0].relocs_ptr = to_user_pointer(reloc);
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+
+		/* out-of-bounds before */
+		obj[0].relocs_ptr = to_user_pointer((reloc - 1));
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+	}
+
+	igt_fixture {
+		obj[0].relocation_count = 0;
+		obj[0].relocs_ptr = 0;
+
+		execbuf.buffer_count = 1;
+
+		/* Make sure the batch would succeed except for the thing we're
+		 * testing. */
+		igt_require(__gem_execbuf(fd, &execbuf) == 0);
+	}
+
+	igt_subtest_f("batch-start-unaligned%s", suffix) {
+		execbuf.batch_start_offset = 1;
+		execbuf.batch_len = 8;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+
+	igt_subtest_f("batch-end-unaligned%s", suffix) {
+		execbuf.batch_start_offset = 0;
+		execbuf.batch_len = 7;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+
+	igt_subtest_f("batch-both-unaligned%s", suffix) {
+		execbuf.batch_start_offset = 1;
+		execbuf.batch_len = 7;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+
+	igt_fixture {
+		/* Undo damage for next tests. */
+		execbuf.batch_start_offset = 0;
+		execbuf.batch_len = 0;
+		igt_require(__gem_execbuf(fd, &execbuf) == 0);
+	}
+
+	igt_subtest_f("single-overflow%s", suffix) {
+		if (*suffix) {
+			igt_require_f(intel_get_avail_ram_mb() >
+				      sizeof(struct drm_i915_gem_relocation_entry) * entries / (1024*1024),
+				      "Test requires at least %'llu MiB, but only %'llu MiB of RAM available\n",
+				      (long long)sizeof(struct drm_i915_gem_relocation_entry) * entries / (1024*1024),
+				      (long long)intel_get_avail_ram_mb());
+		}
+
+		obj[0].relocs_ptr = to_user_pointer(reloc);
+		obj[0].relocation_count = entries;
+		execbuf.buffer_count = 1;
+		gem_execbuf(fd, &execbuf);
+
+		/* Attempt single overflowed entry. */
+		obj[0].relocation_count = -1;
+		igt_debug("relocation_count=%u\n",
+				obj[0].relocation_count);
+		if (max_relocations <= obj[0].relocation_count)
+			igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+		else
+			igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+
+		if (max_relocations + 1 < obj[0].relocation_count) {
+			obj[0].relocation_count = max_relocations + 1;
+			igt_debug("relocation_count=%u\n",
+				  obj[0].relocation_count);
+			igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+			obj[0].relocation_count = max_relocations - 1;
+			igt_debug("relocation_count=%u\n",
+				  obj[0].relocation_count);
+			igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+		}
+	}
+
+	igt_subtest_f("wrapped-overflow%s", suffix) {
+		if (*suffix) {
+			igt_require_f(intel_get_avail_ram_mb() >
+				      sizeof(struct drm_i915_gem_relocation_entry) * entries * num / (1024*1024),
+				      "Test requires at least %'llu MiB, but only %'llu MiB of RAM available\n",
+				      (long long)sizeof(struct drm_i915_gem_relocation_entry) * entries * num / (1024*1024),
+				      (long long)intel_get_avail_ram_mb());
+		}
+
+		for (i = 0; i < num; i++) {
+			struct drm_i915_gem_exec_object2 *o = &obj[i];
+
+			o->relocs_ptr = to_user_pointer(reloc);
+			o->relocation_count = entries;
+		}
+		execbuf.buffer_count = i;
+		gem_execbuf(fd, &execbuf);
+
+		obj[i-1].relocation_count = -1;
+		igt_debug("relocation_count[%d]=%u\n",
+			  i-1, obj[i-1].relocation_count);
+                if (max_relocations <= obj[i-1].relocation_count)
+                        igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+		else
+                        igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+
+		if (max_relocations < obj[i-1].relocation_count) {
+			obj[i-1].relocation_count = max_relocations;
+			igt_debug("relocation_count[%d]=%u\n",
+				  i-1, obj[i-1].relocation_count);
+			/* Whether the kernel reports the EFAULT for the
+			 * invalid relocation array or EINVAL for the overflow
+			 * in array size depends upon the order of the
+			 * individual tests. From a consistency perspective
+			 * EFAULT is preferred (i.e. using that relocation
+			 * array by itself would cause EFAULT not EINVAL).
+			 */
+			igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+
+			obj[i-1].relocation_count = max_relocations - 1;
+			igt_debug("relocation_count[%d]=%u\n",
+				  i-1, obj[i-1].relocation_count);
+			igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+		}
+
+		obj[i-1].relocation_count = entries + 1;
+		igt_debug("relocation_count[%d]=%u\n",
+                          i-1, obj[i-1].relocation_count);
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+
+		obj[0].relocation_count = -1;
+		if (max_relocations < obj[0].relocation_count) {
+			execbuf.buffer_count = 1;
+			gem_execbuf(fd, &execbuf);
+
+			/* As outlined above, this is why EFAULT is preferred */
+			obj[0].relocation_count = max_relocations;
+			igt_debug("relocation_count[0]=%u\n",
+				  obj[0].relocation_count);
+			igt_assert_eq(__gem_execbuf(fd, &execbuf), -EFAULT);
+		}
+	}
+}
+
+static void buffer_count_tests(void)
+{
+	igt_subtest("buffercount-overflow") {
+		igt_skip_on(SIZE_MAX / sizeof(*obj) >= UINT_MAX);
+
+		for (int i = 0; i < num; i++) {
+			obj[i].relocation_count = 0;
+			obj[i].relocs_ptr = 0;
+		}
+
+		/* We only have num buffers actually, but the overflow will make
+		 * sure we blow up the kernel before we blow up userspace. */
+		execbuf.buffer_count = num;
+
+		/* Make sure the basic thing would work first ... */
+		gem_execbuf(fd, &execbuf);
+
+		/* ... then be evil: Overflow of the pointer table (which has a
+		 * bit of lead datastructures, so no + 1 needed to overflow). */
+		execbuf.buffer_count = INT_MAX / sizeof(void *);
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+		/* ... then be evil: Copying/allocating the array. */
+		execbuf.buffer_count = UINT_MAX / sizeof(obj[0]) + 1;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	}
+}
+
+igt_main
+{
+	int devid = 0;
+
+	igt_fixture {
+		uint32_t bbe = MI_BATCH_BUFFER_END;
+		size_t reloc_size;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		devid = intel_get_drm_devid(fd);
+
+		/* Create giant reloc buffer area. */
+		num = 257;
+		entries = ((1ULL << 32) / (num - 1));
+		reloc_size = entries * sizeof(struct drm_i915_gem_relocation_entry);
+		igt_assert((reloc_size & 4095) == 0);
+		reloc = mmap(NULL, reloc_size + 2*4096, PROT_READ | PROT_WRITE,
+			     MAP_PRIVATE | MAP_ANON, -1, 0);
+		igt_assert(reloc != MAP_FAILED);
+		igt_require_f(mlock(reloc, reloc_size) == 0,
+			      "Tests require at least %'llu MiB of available memory\n",
+			      (long long unsigned)reloc_size / (1024*1024));
+
+		/* disable access before + after */
+		mprotect(reloc, 4096, 0);
+		reloc = (struct drm_i915_gem_relocation_entry *)((char *)reloc + 4096);
+		mprotect(reloc + entries, 4096, 0);
+
+		/* Allocate the handles we'll need to wrap. */
+		intel_require_memory(num+1, 4096, CHECK_RAM);
+		obj = calloc(num, sizeof(*obj));
+		igt_assert(obj);
+
+		/* First object is used for page crossing tests */
+		obj[0].handle = gem_create(fd, 8192);
+		gem_write(fd, obj[0].handle, 0, &bbe, sizeof(bbe));
+		for (int i = 1; i < num; i++) {
+			obj[i].handle = gem_create(fd, 4096);
+			gem_write(fd, obj[i].handle, 0, &bbe, sizeof(bbe));
+		}
+
+		/* Create relocation objects. */
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(obj);
+		execbuf.buffer_count = 1;
+		execbuf.flags = I915_EXEC_HANDLE_LUT;
+		if (__gem_execbuf(fd, &execbuf))
+			execbuf.flags = 0;
+
+		for (int i = 0; i < entries; i++) {
+			reloc[i].target_handle = target_handle();
+			reloc[i].offset = 1024;
+			reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+			reloc[i].write_domain = 0;
+		}
+	}
+
+	reloc_tests("");
+	igt_fixture
+		igt_disable_prefault();
+	reloc_tests("-noprefault");
+	igt_fixture
+		igt_enable_prefault();
+
+	source_offset_tests(devid, false);
+	source_offset_tests(devid, true);
+
+	buffer_count_tests();
+}
diff --git a/tests/i915/gem_reloc_vs_gpu.c b/tests/i915/gem_reloc_vs_gpu.c
new file mode 100644
index 00000000..d421e434
--- /dev/null
+++ b/tests/i915/gem_reloc_vs_gpu.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright © 2011,2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <signal.h>
+#include <sys/wait.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Test kernel relocations vs. gpu races.");
+
+/*
+ * Testcase: Kernel relocations vs. gpu races
+ *
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+uint32_t blob[2048*2048];
+#define NUM_TARGET_BOS 16
+drm_intel_bo *pc_target_bo[NUM_TARGET_BOS];
+drm_intel_bo *dummy_bo;
+drm_intel_bo *special_bo;
+uint32_t devid;
+int special_reloc_ofs;
+int special_batch_len;
+
+static void create_special_bo(void)
+{
+	uint32_t data[1024];
+	int len = 0;
+	int small_pitch = 64;
+#define BATCH(dw) data[len++] = (dw);
+
+	memset(data, 0, 4096);
+	special_bo = drm_intel_bo_alloc(bufmgr, "special batch", 4096, 4096);
+
+	if (intel_gen(devid) >= 8) {
+		BATCH(MI_NOOP);
+		BATCH(XY_COLOR_BLT_CMD_NOLEN | 5 |
+				COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB);
+	} else {
+		BATCH(XY_COLOR_BLT_CMD_NOLEN | 4 |
+				COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB);
+	}
+
+	BATCH((3 << 24) | (0xf0 << 16) | small_pitch);
+	BATCH(0);
+	BATCH(1 << 16 | 1);
+	special_reloc_ofs = 4*len;
+	BATCH(0);
+	if (intel_gen(devid) >= 8)
+		BATCH(0);
+	BATCH(0xdeadbeef);
+
+#define CMD_POLY_STIPPLE_OFFSET       0x7906
+	/* batchbuffer end */
+	if (IS_GEN5(batch->devid)) {
+		BATCH(CMD_POLY_STIPPLE_OFFSET << 16);
+		BATCH(0);
+	}
+	igt_assert_eq(len % 2, 0);
+	BATCH(MI_NOOP);
+	BATCH(MI_BATCH_BUFFER_END);
+
+	drm_intel_bo_subdata(special_bo, 0, 4096, data);
+	special_batch_len = len*4;
+}
+
+static void emit_dummy_load(int pitch)
+{
+	int i;
+	uint32_t tile_flags = 0;
+
+	if (IS_965(devid)) {
+		pitch /= 4;
+		tile_flags = XY_SRC_COPY_BLT_SRC_TILED |
+			XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	for (i = 0; i < 10; i++) {
+		BLIT_COPY_BATCH_START(tile_flags);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  pitch);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((2048) << 16 | (2048));
+		OUT_RELOC_FENCED(dummy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(pitch);
+		OUT_RELOC_FENCED(dummy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+	intel_batchbuffer_flush(batch);
+}
+
+static void reloc_and_emit(int fd, drm_intel_bo *target_bo, bool faulting_reloc)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[2];
+	struct drm_i915_gem_relocation_entry reloc[1];
+	uint32_t handle_relocs;
+	void *gtt_relocs;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	memset(exec, 0, sizeof(exec));
+	memset(reloc, 0, sizeof(reloc));
+
+	exec[0].handle = target_bo->handle;
+
+	reloc[0].offset = special_reloc_ofs;
+	reloc[0].target_handle = target_bo->handle;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	/* We do not track the last patched value, so force the relocation
+	 * every time.
+	 */
+	reloc[0].presumed_offset = -1;
+
+	handle_relocs = gem_create(fd, 4096);
+	gem_write(fd, handle_relocs, 0, reloc, sizeof(reloc));
+	gtt_relocs = gem_mmap__gtt(fd, handle_relocs, 4096,
+				   PROT_READ | PROT_WRITE);
+
+	exec[1].handle = special_bo->handle;
+	exec[1].relocation_count = 1;
+	/* A newly mmap gtt bo will fault on first access. */
+	if (faulting_reloc)
+		exec[1].relocs_ptr = to_user_pointer(gtt_relocs);
+	else
+		exec[1].relocs_ptr = to_user_pointer(reloc);
+
+	execbuf.buffers_ptr = to_user_pointer(exec);
+	execbuf.buffer_count = 2;
+	execbuf.batch_len = special_batch_len;
+	if (intel_gen(devid) >= 6)
+		execbuf.flags |= I915_EXEC_BLT;
+
+	gem_execbuf(fd, &execbuf);
+
+	gem_close(fd, handle_relocs);
+}
+
+static igt_hang_t no_hang(int fd)
+{
+	return (igt_hang_t){0};
+}
+
+static igt_hang_t bcs_hang(int fd)
+{
+	return igt_hang_ring(fd, I915_EXEC_BLT);
+}
+
+static void do_test(int fd, bool faulting_reloc,
+		    igt_hang_t (*do_hang)(int fd))
+{
+	uint32_t tiling_mode = I915_TILING_X;
+	unsigned long pitch, act_size;
+	uint32_t test;
+	int i;
+
+	if (faulting_reloc)
+		igt_disable_prefault();
+
+	act_size = 2048;
+	dummy_bo = drm_intel_bo_alloc_tiled(bufmgr, "tiled dummy_bo", act_size, act_size,
+				      4, &tiling_mode, &pitch, 0);
+
+	drm_intel_bo_subdata(dummy_bo, 0, act_size*act_size*4, blob);
+
+	create_special_bo();
+
+	for (i = 0; i < NUM_TARGET_BOS; i++) {
+		igt_hang_t hang;
+
+		pc_target_bo[i] = drm_intel_bo_alloc(bufmgr, "special batch", 4096, 4096);
+		emit_dummy_load(pitch);
+		igt_assert(pc_target_bo[i]->offset == 0);
+		hang = do_hang(fd);
+
+		reloc_and_emit(fd, pc_target_bo[i], faulting_reloc);
+
+		igt_post_hang_ring(fd, hang);
+	}
+
+	/* Only check at the end to avoid unnecessary synchronous behaviour. */
+	for (i = 0; i < NUM_TARGET_BOS; i++) {
+		drm_intel_bo_get_subdata(pc_target_bo[i], 0, 4, &test);
+		igt_assert_f(test == 0xdeadbeef,
+			     "mismatch in buffer %i: 0x%08x instead of 0xdeadbeef\n", i, test);
+		drm_intel_bo_unreference(pc_target_bo[i]);
+	}
+
+	drm_intel_gem_bo_map_gtt(dummy_bo);
+	drm_intel_gem_bo_unmap_gtt(dummy_bo);
+
+	drm_intel_bo_unreference(special_bo);
+	drm_intel_bo_unreference(dummy_bo);
+
+	if (faulting_reloc)
+		igt_enable_prefault();
+}
+
+#define INTERRUPT	(1 << 0)
+#define FAULTING	(1 << 1)
+#define THRASH		(1 << 2)
+#define THRASH_INACTIVE	(1 << 3)
+#define HANG		(1 << 4)
+#define ALL_FLAGS	(HANG | INTERRUPT | FAULTING | THRASH | THRASH_INACTIVE)
+static void do_forked_test(int fd, unsigned flags)
+{
+	int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+	struct igt_helper_process thrasher = {};
+
+	if (flags & HANG)
+		igt_require_hang_ring(fd, I915_EXEC_BLT);
+
+	if (flags & (THRASH | THRASH_INACTIVE)) {
+		uint64_t val = (flags & THRASH_INACTIVE) ?
+				(DROP_RETIRE | DROP_BOUND | DROP_UNBOUND) : DROP_ALL;
+
+		igt_fork_helper(&thrasher) {
+			while (1) {
+				usleep(1000);
+				igt_drop_caches_set(fd, val);
+			}
+		}
+	}
+
+	igt_fork(i, num_threads * 4) {
+		/* re-create process local data */
+		fd = drm_open_driver(DRIVER_INTEL);
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+		if (flags & INTERRUPT)
+			igt_fork_signal_helper();
+
+		do_test(fd, flags & FAULTING, flags & HANG ? bcs_hang : no_hang);
+
+		if (flags & INTERRUPT)
+			igt_stop_signal_helper();
+	}
+
+	igt_waitchildren();
+	if (flags & (THRASH | THRASH_INACTIVE))
+		igt_stop_helper(&thrasher);
+}
+
+int fd;
+
+#define MAX_BLT_SIZE 128
+igt_main
+{
+	igt_skip_on_simulation();
+
+	memset(blob, 'A', sizeof(blob));
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		/* disable reuse, otherwise the test fails */
+		//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+		devid = intel_get_drm_devid(fd);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+	}
+
+	igt_subtest("normal")
+		do_test(fd, false, no_hang);
+
+	igt_subtest("faulting-reloc")
+		do_test(fd, true, no_hang);
+
+	igt_fork_signal_helper();
+	igt_subtest("interruptible")
+		do_test(fd, false, no_hang);
+
+	igt_subtest("interruptible-hang")
+		do_test(fd, false, bcs_hang);
+
+	igt_subtest("faulting-reloc-interruptible")
+		do_test(fd, true, no_hang);
+
+	igt_subtest("faulting-reloc-interruptible-hang")
+		do_test(fd, true, bcs_hang);
+	igt_stop_signal_helper();
+
+	for (unsigned flags = 0; flags <= ALL_FLAGS; flags++) {
+		if ((flags & THRASH) && (flags & THRASH_INACTIVE))
+			continue;
+
+		igt_subtest_f("forked%s%s%s%s%s",
+			      flags & INTERRUPT ? "-interruptible" : "",
+			      flags & FAULTING ? "-faulting-reloc" : "",
+			      flags & THRASH ? "-thrashing" : "",
+			      flags & THRASH_INACTIVE ? "-thrash-inactive" : "",
+			      flags & HANG ? "-hang": "")
+			do_forked_test(fd, flags);
+	}
+
+	igt_fixture {
+		intel_batchbuffer_free(batch);
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_render_copy.c b/tests/i915/gem_render_copy.c
new file mode 100644
index 00000000..17a66564
--- /dev/null
+++ b/tests/i915/gem_render_copy.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Damien Lespiau <damien.lespiau@intel.com>
+ */
+
+/*
+ * This file is a basic test for the render_copy() function, a very simple
+ * workload for the 3D engine.
+ */
+
+#include "igt.h"
+#include "igt_x86.h"
+#include <stdbool.h>
+#include <unistd.h>
+#include <cairo.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Basic test for the render_copy() function.");
+
+#define WIDTH 512
+#define HEIGHT 512
+
+typedef struct {
+	int drm_fd;
+	uint32_t devid;
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	igt_render_copyfunc_t render_copy;
+} data_t;
+static int opt_dump_png = false;
+static int check_all_pixels = false;
+
+static const char *make_filename(const char *filename)
+{
+	static char buf[64];
+
+	snprintf(buf, sizeof(buf), "%s_%s", igt_subtest_name(), filename);
+
+	return buf;
+}
+
+static void *linear_copy(data_t *data, struct igt_buf *buf)
+{
+	void *map, *linear;
+
+	igt_assert_eq(posix_memalign(&linear, 16, buf->bo->size), 0);
+
+	gem_set_domain(data->drm_fd, buf->bo->handle,
+		       I915_GEM_DOMAIN_GTT, 0);
+
+	map = gem_mmap__gtt(data->drm_fd, buf->bo->handle,
+			    buf->bo->size, PROT_READ);
+
+	igt_memcpy_from_wc(linear, map, buf->bo->size);
+
+	munmap(map, buf->bo->size);
+
+	return linear;
+}
+
+static void scratch_buf_write_to_png(data_t *data, struct igt_buf *buf,
+				     const char *filename)
+{
+	cairo_surface_t *surface;
+	cairo_status_t ret;
+	void *linear;
+
+	linear = linear_copy(data, buf);
+
+	surface = cairo_image_surface_create_for_data(linear,
+						      CAIRO_FORMAT_RGB24,
+						      igt_buf_width(buf),
+						      igt_buf_height(buf),
+						      buf->stride);
+	ret = cairo_surface_write_to_png(surface, make_filename(filename));
+	igt_assert(ret == CAIRO_STATUS_SUCCESS);
+	cairo_surface_destroy(surface);
+
+	free(linear);
+}
+
+static int scratch_buf_aux_width(const struct igt_buf *buf)
+{
+	return DIV_ROUND_UP(igt_buf_width(buf), 1024) * 128;
+}
+
+static int scratch_buf_aux_height(const struct igt_buf *buf)
+{
+	return DIV_ROUND_UP(igt_buf_height(buf), 512) * 32;
+}
+
+static void *linear_copy_aux(data_t *data, struct igt_buf *buf)
+{
+	void *map, *linear;
+	int aux_size = scratch_buf_aux_width(buf) *
+		scratch_buf_aux_height(buf);
+
+	igt_assert_eq(posix_memalign(&linear, 16, aux_size), 0);
+
+	gem_set_domain(data->drm_fd, buf->bo->handle,
+		       I915_GEM_DOMAIN_GTT, 0);
+
+	map = gem_mmap__gtt(data->drm_fd, buf->bo->handle,
+			    buf->bo->size, PROT_READ);
+
+	igt_memcpy_from_wc(linear, map + buf->aux.offset, aux_size);
+
+	munmap(map, buf->bo->size);
+
+	return linear;
+}
+
+static void scratch_buf_aux_write_to_png(data_t *data,
+					 struct igt_buf *buf,
+					 const char *filename)
+{
+	cairo_surface_t *surface;
+	cairo_status_t ret;
+	void *linear;
+
+	linear = linear_copy_aux(data, buf);
+
+	surface = cairo_image_surface_create_for_data(linear,
+						      CAIRO_FORMAT_A8,
+						      scratch_buf_aux_width(buf),
+						      scratch_buf_aux_height(buf),
+						      buf->aux.stride);
+	ret = cairo_surface_write_to_png(surface, make_filename(filename));
+	igt_assert(ret == CAIRO_STATUS_SUCCESS);
+	cairo_surface_destroy(surface);
+
+	free(linear);
+}
+
+static void scratch_buf_draw_pattern(data_t *data, struct igt_buf *buf,
+				     int x, int y, int w, int h,
+				     int cx, int cy, int cw, int ch,
+				     bool use_alternate_colors)
+{
+	cairo_surface_t *surface;
+	cairo_pattern_t *pat;
+	cairo_t *cr;
+	void *map, *linear;
+
+	linear = linear_copy(data, buf);
+
+	surface = cairo_image_surface_create_for_data(linear,
+						      CAIRO_FORMAT_RGB24,
+						      igt_buf_width(buf),
+						      igt_buf_height(buf),
+						      buf->stride);
+
+	cr = cairo_create(surface);
+
+	cairo_rectangle(cr, cx, cy, cw, ch);
+	cairo_clip(cr);
+
+	pat = cairo_pattern_create_mesh();
+	cairo_mesh_pattern_begin_patch(pat);
+	cairo_mesh_pattern_move_to(pat, x,   y);
+	cairo_mesh_pattern_line_to(pat, x+w, y);
+	cairo_mesh_pattern_line_to(pat, x+w, y+h);
+	cairo_mesh_pattern_line_to(pat, x,   y+h);
+	if (use_alternate_colors) {
+		cairo_mesh_pattern_set_corner_color_rgb(pat, 0, 0.0, 1.0, 1.0);
+		cairo_mesh_pattern_set_corner_color_rgb(pat, 1, 1.0, 0.0, 1.0);
+		cairo_mesh_pattern_set_corner_color_rgb(pat, 2, 1.0, 1.0, 0.0);
+		cairo_mesh_pattern_set_corner_color_rgb(pat, 3, 0.0, 0.0, 0.0);
+	} else {
+		cairo_mesh_pattern_set_corner_color_rgb(pat, 0, 1.0, 0.0, 0.0);
+		cairo_mesh_pattern_set_corner_color_rgb(pat, 1, 0.0, 1.0, 0.0);
+		cairo_mesh_pattern_set_corner_color_rgb(pat, 2, 0.0, 0.0, 1.0);
+		cairo_mesh_pattern_set_corner_color_rgb(pat, 3, 1.0, 1.0, 1.0);
+	}
+	cairo_mesh_pattern_end_patch(pat);
+
+	cairo_rectangle(cr, x, y, w, h);
+	cairo_set_source(cr, pat);
+	cairo_fill(cr);
+	cairo_pattern_destroy(pat);
+
+	cairo_destroy(cr);
+
+	cairo_surface_destroy(surface);
+
+	gem_set_domain(data->drm_fd, buf->bo->handle,
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	map = gem_mmap__gtt(data->drm_fd, buf->bo->handle,
+			    buf->bo->size, PROT_READ | PROT_WRITE);
+
+	memcpy(map, linear, buf->bo->size);
+
+	munmap(map, buf->bo->size);
+
+	free(linear);
+}
+
+static void
+scratch_buf_copy(data_t *data,
+		 struct igt_buf *src, int sx, int sy, int w, int h,
+		 struct igt_buf *dst, int dx, int dy)
+{
+	int width = igt_buf_width(dst);
+	int height  = igt_buf_height(dst);
+	uint32_t *linear_dst, *linear_src;
+
+	igt_assert_eq(igt_buf_width(dst), igt_buf_width(src));
+	igt_assert_eq(igt_buf_height(dst), igt_buf_height(src));
+	igt_assert_eq(dst->bo->size, src->bo->size);
+
+	gem_set_domain(data->drm_fd, dst->bo->handle,
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_set_domain(data->drm_fd, src->bo->handle,
+		       I915_GEM_DOMAIN_GTT, 0);
+
+	linear_dst = gem_mmap__gtt(data->drm_fd, dst->bo->handle,
+				   dst->bo->size, PROT_WRITE);
+	linear_src = gem_mmap__gtt(data->drm_fd, src->bo->handle,
+				   src->bo->size, PROT_READ);
+
+	w = min(w, width - sx);
+	w = min(w, width - dx);
+
+	h = min(h, height - sy);
+	h = min(h, height - dy);
+
+	for (int y = 0; y < h; y++) {
+		igt_memcpy_from_wc(&linear_dst[(dy+y) * width + dx],
+				   &linear_src[(sy+y) * width + sx],
+				   w * 4);
+	}
+
+	munmap(linear_dst, dst->bo->size);
+	munmap(linear_src, src->bo->size);
+}
+
+static void scratch_buf_init(data_t *data, struct igt_buf *buf,
+			     int width, int height,
+			     uint32_t req_tiling, bool ccs)
+{
+	uint32_t tiling = req_tiling;
+	unsigned long pitch;
+
+	memset(buf, 0, sizeof(*buf));
+
+	if (ccs) {
+		int aux_width, aux_height;
+		int size;
+
+		igt_require(intel_gen(data->devid) >= 9);
+		igt_assert_eq(tiling, I915_TILING_Y);
+
+		buf->stride = ALIGN(width * 4, 128);
+		buf->size = buf->stride * height;
+		buf->tiling = tiling;
+
+		aux_width = scratch_buf_aux_width(buf);
+		aux_height = scratch_buf_aux_height(buf);
+
+		buf->aux.offset = buf->stride * ALIGN(height, 32);
+		buf->aux.stride = aux_width;
+
+		size = buf->aux.offset + aux_width * aux_height;
+
+		buf->bo = drm_intel_bo_alloc(data->bufmgr, "", size, 4096);
+
+		drm_intel_bo_set_tiling(buf->bo, &tiling, buf->stride);
+		igt_assert_eq(tiling, req_tiling);
+	} else {
+		buf->bo = drm_intel_bo_alloc_tiled(data->bufmgr, "",
+						   width, height, 4,
+						   &tiling, &pitch, 0);
+		igt_assert_eq(tiling, req_tiling);
+
+		buf->stride = pitch;
+		buf->tiling = tiling;
+		buf->size = pitch * height;
+	}
+
+	igt_assert(igt_buf_width(buf) == width);
+	igt_assert(igt_buf_height(buf) == height);
+}
+
+static void
+scratch_buf_check(data_t *data,
+		  struct igt_buf *buf,
+		  struct igt_buf *ref,
+		  int x, int y)
+{
+	int width = igt_buf_width(buf);
+	uint32_t buf_val, ref_val;
+	uint32_t *linear;
+
+	igt_assert_eq(igt_buf_width(buf), igt_buf_width(ref));
+	igt_assert_eq(igt_buf_height(buf), igt_buf_height(ref));
+	igt_assert_eq(buf->bo->size, ref->bo->size);
+
+	linear = linear_copy(data, buf);
+	buf_val = linear[y * width + x];
+	free(linear);
+
+	linear = linear_copy(data, ref);
+	ref_val = linear[y * width + x];
+	free(linear);
+
+	igt_assert_f(buf_val == ref_val,
+		     "Expected 0x%08x, found 0x%08x at (%d,%d)\n",
+		     ref_val, buf_val, x, y);
+}
+
+static void
+scratch_buf_check_all(data_t *data,
+		      struct igt_buf *buf,
+		      struct igt_buf *ref)
+{
+	int width = igt_buf_width(buf);
+	int height  = igt_buf_height(buf);
+	uint32_t *linear_buf, *linear_ref;
+
+	igt_assert_eq(igt_buf_width(buf), igt_buf_width(ref));
+	igt_assert_eq(igt_buf_height(buf), igt_buf_height(ref));
+	igt_assert_eq(buf->bo->size, ref->bo->size);
+
+	linear_buf = linear_copy(data, buf);
+	linear_ref = linear_copy(data, ref);
+
+	for (int y = 0; y < height; y++) {
+		for (int x = 0; x < width; x++) {
+			uint32_t buf_val = linear_buf[y * width + x];
+			uint32_t ref_val = linear_ref[y * width + x];
+
+			igt_assert_f(buf_val == ref_val,
+				     "Expected 0x%08x, found 0x%08x at (%d,%d)\n",
+				     ref_val, buf_val, x, y);
+		}
+	}
+
+	free(linear_ref);
+	free(linear_buf);
+}
+
+static void scratch_buf_aux_check(data_t *data,
+				  struct igt_buf *buf)
+{
+	int aux_size = scratch_buf_aux_width(buf) *
+		scratch_buf_aux_height(buf);
+	uint8_t *linear;
+	int i;
+
+	linear = linear_copy_aux(data, buf);
+
+	for (i = 0; i < aux_size; i++) {
+		if (linear[i])
+			break;
+	}
+
+	free(linear);
+
+	igt_assert_f(i < aux_size,
+		     "Aux surface indicates that nothing was compressed\n");
+}
+
+static void test(data_t *data, uint32_t tiling, bool test_ccs)
+{
+	struct igt_buf dst, ccs, ref;
+	struct {
+		struct igt_buf buf;
+		const char *filename;
+		uint32_t tiling;
+		int x, y;
+	} src[3] = {
+		{
+			.filename = "source-linear.png",
+			.tiling = I915_TILING_NONE,
+			.x = 1, .y = HEIGHT/2+1,
+		},
+		{
+			.filename = "source-x-tiled.png",
+			.tiling = I915_TILING_X,
+			.x = WIDTH/2+1, .y = HEIGHT/2+1,
+		},
+		{
+			.filename = "source-y-tiled.png",
+			.tiling = I915_TILING_Y,
+			.x = WIDTH/2+1, .y = 1,
+		},
+	};
+
+	int opt_dump_aub = igt_aub_dump_enabled();
+
+	for (int i = 0; i < ARRAY_SIZE(src); i++)
+		scratch_buf_init(data, &src[i].buf, WIDTH, HEIGHT, src[i].tiling, false);
+	scratch_buf_init(data, &dst, WIDTH, HEIGHT, tiling, false);
+	if (test_ccs)
+		scratch_buf_init(data, &ccs, WIDTH, HEIGHT, I915_TILING_Y, true);
+	scratch_buf_init(data, &ref, WIDTH, HEIGHT, I915_TILING_NONE, false);
+
+	for (int i = 0; i < ARRAY_SIZE(src); i++)
+		scratch_buf_draw_pattern(data, &src[i].buf,
+					 0, 0, WIDTH, HEIGHT,
+					 0, 0, WIDTH, HEIGHT, true);
+	scratch_buf_draw_pattern(data, &dst,
+				 0, 0, WIDTH, HEIGHT,
+				 0, 0, WIDTH, HEIGHT, false);
+
+	scratch_buf_copy(data,
+			 &dst, 0, 0, WIDTH, HEIGHT,
+			 &ref, 0, 0);
+	for (int i = 0; i < ARRAY_SIZE(src); i++)
+		scratch_buf_copy(data,
+				 &src[i].buf, WIDTH/4, HEIGHT/4, WIDTH/2-2, HEIGHT/2-2,
+				 &ref, src[i].x, src[i].y);
+
+	if (opt_dump_png) {
+		for (int i = 0; i < ARRAY_SIZE(src); i++)
+			scratch_buf_write_to_png(data, &src[i].buf, src[i].filename);
+		scratch_buf_write_to_png(data, &dst, "destination.png");
+		scratch_buf_write_to_png(data, &ref, "reference.png");
+	}
+
+	if (opt_dump_aub) {
+		drm_intel_bufmgr_gem_set_aub_filename(data->bufmgr,
+						      "rendercopy.aub");
+		drm_intel_bufmgr_gem_set_aub_dump(data->bufmgr, true);
+	}
+
+	/* This will copy the src to the mid point of the dst buffer. Presumably
+	 * the out of bounds accesses will get clipped.
+	 * Resulting buffer should look like:
+	 *	  _______
+	 *	 |dst|dst|
+	 *	 |dst|src|
+	 *	  -------
+	 */
+	if (test_ccs)
+		data->render_copy(data->batch, NULL,
+				  &dst, 0, 0, WIDTH, HEIGHT,
+				  &ccs, 0, 0);
+
+	for (int i = 0; i < ARRAY_SIZE(src); i++)
+		data->render_copy(data->batch, NULL,
+				  &src[i].buf, WIDTH/4, HEIGHT/4, WIDTH/2-2, HEIGHT/2-2,
+				  test_ccs ? &ccs : &dst, src[i].x, src[i].y);
+
+	if (test_ccs)
+		data->render_copy(data->batch, NULL,
+				  &ccs, 0, 0, WIDTH, HEIGHT,
+				  &dst, 0, 0);
+
+	if (opt_dump_png){
+		scratch_buf_write_to_png(data, &dst, "result.png");
+		if (test_ccs) {
+			scratch_buf_write_to_png(data, &ccs, "compressed.png");
+			scratch_buf_aux_write_to_png(data, &ccs, "compressed-aux.png");
+		}
+	}
+
+	if (opt_dump_aub) {
+		drm_intel_gem_bo_aub_dump_bmp(dst.bo,
+					      0, 0, igt_buf_width(&dst),
+					      igt_buf_height(&dst),
+					      AUB_DUMP_BMP_FORMAT_ARGB_8888,
+					      dst.stride, 0);
+		drm_intel_bufmgr_gem_set_aub_dump(data->bufmgr, false);
+	} else if (check_all_pixels) {
+		scratch_buf_check_all(data, &dst, &ref);
+	} else {
+		scratch_buf_check(data, &dst, &ref, 10, 10);
+		scratch_buf_check(data, &dst, &ref, WIDTH - 10, HEIGHT - 10);
+	}
+
+	if (test_ccs)
+		scratch_buf_aux_check(data, &ccs);
+}
+
+static int opt_handler(int opt, int opt_index, void *data)
+{
+	if (opt == 'd') {
+		opt_dump_png = true;
+	}
+
+	if (opt == 'a') {
+		check_all_pixels = true;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	data_t data = {0, };
+
+	igt_subtest_init_parse_opts(&argc, argv, "da", NULL, NULL,
+				    opt_handler, NULL);
+
+	igt_fixture {
+		data.drm_fd = drm_open_driver_render(DRIVER_INTEL);
+		data.devid = intel_get_drm_devid(data.drm_fd);
+		igt_require_gem(data.drm_fd);
+
+		data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
+		igt_assert(data.bufmgr);
+
+		data.render_copy = igt_get_render_copyfunc(data.devid);
+		igt_require_f(data.render_copy,
+			      "no render-copy function\n");
+
+		data.batch = intel_batchbuffer_alloc(data.bufmgr, data.devid);
+		igt_assert(data.batch);
+	}
+
+	igt_subtest("linear")
+		test(&data, I915_TILING_NONE, false);
+	igt_subtest("x-tiled")
+		test(&data, I915_TILING_X, false);
+	igt_subtest("y-tiled")
+		test(&data, I915_TILING_Y, false);
+
+	igt_subtest("y-tiled-ccs-to-linear")
+		test(&data, I915_TILING_NONE, true);
+	igt_subtest("y-tiled-ccs-to-x-tiled")
+		test(&data, I915_TILING_X, true);
+	igt_subtest("y-tiled-ccs-to-y-tiled")
+		test(&data, I915_TILING_Y, true);
+
+	igt_fixture {
+		intel_batchbuffer_free(data.batch);
+		drm_intel_bufmgr_destroy(data.bufmgr);
+	}
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_render_copy_redux.c b/tests/i915/gem_render_copy_redux.c
new file mode 100644
index 00000000..a861862d
--- /dev/null
+++ b/tests/i915/gem_render_copy_redux.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright © 2013-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Damien Lespiau <damien.lespiau@intel.com>
+ */
+
+/*
+ * This file is an "advanced" test for the render_copy() function, a very simple
+ * workload for the 3D engine. The basic test in gem_render_copy.c is intentionally
+ * kept extremely simple to allow for aub instrumentation and to ease debugging of
+ * the render copy functions themselves. This test on the overhand aims to stress
+ * the execbuffer interface with a simple render workload.
+ */
+
+#include "igt.h"
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Advanced test for the render_copy() function.");
+
+#define WIDTH 512
+#define STRIDE (WIDTH*4)
+#define HEIGHT 512
+#define SIZE (HEIGHT*STRIDE)
+
+#define SRC_COLOR	0xffff00ff
+#define DST_COLOR	0xfff0ff00
+
+typedef struct {
+	int fd;
+	uint32_t devid;
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	igt_render_copyfunc_t render_copy;
+	uint32_t linear[WIDTH * HEIGHT];
+} data_t;
+
+static void data_init(data_t *data)
+{
+	data->fd = drm_open_driver(DRIVER_INTEL);
+	data->devid = intel_get_drm_devid(data->fd);
+
+	data->bufmgr = drm_intel_bufmgr_gem_init(data->fd, 4096);
+	igt_assert(data->bufmgr);
+
+	data->render_copy = igt_get_render_copyfunc(data->devid);
+	igt_require_f(data->render_copy,
+		      "no render-copy function\n");
+
+	data->batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
+	igt_assert(data->batch);
+}
+
+static void data_fini(data_t *data)
+{
+	 intel_batchbuffer_free(data->batch);
+	 drm_intel_bufmgr_destroy(data->bufmgr);
+	 close(data->fd);
+}
+
+static void scratch_buf_init(data_t *data, struct igt_buf *buf,
+			     int width, int height, int stride, uint32_t color)
+{
+	drm_intel_bo *bo;
+	int i;
+
+	bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096);
+	for (i = 0; i < width * height; i++)
+		data->linear[i] = color;
+	gem_write(data->fd, bo->handle, 0, data->linear,
+		  sizeof(data->linear));
+
+	memset(buf, 0, sizeof(*buf));
+
+	buf->bo = bo;
+	buf->stride = stride;
+	buf->tiling = I915_TILING_NONE;
+	buf->size = SIZE;
+}
+
+static void scratch_buf_fini(data_t *data, struct igt_buf *buf)
+{
+	dri_bo_unreference(buf->bo);
+	memset(buf, 0, sizeof(*buf));
+}
+
+static void
+scratch_buf_check(data_t *data, struct igt_buf *buf, int x, int y,
+		  uint32_t color)
+{
+	uint32_t val;
+
+	gem_read(data->fd, buf->bo->handle, 0,
+		 data->linear, sizeof(data->linear));
+	val = data->linear[y * WIDTH + x];
+	igt_assert_f(val == color,
+		     "Expected 0x%08x, found 0x%08x at (%d,%d)\n",
+		     color, val, x, y);
+}
+
+static void copy(data_t *data)
+{
+	struct igt_buf src, dst;
+
+	scratch_buf_init(data, &src, WIDTH, HEIGHT, STRIDE, SRC_COLOR);
+	scratch_buf_init(data, &dst, WIDTH, HEIGHT, STRIDE, DST_COLOR);
+
+	scratch_buf_check(data, &src, WIDTH / 2, HEIGHT / 2, SRC_COLOR);
+	scratch_buf_check(data, &dst, WIDTH / 2, HEIGHT / 2, DST_COLOR);
+
+	data->render_copy(data->batch, NULL,
+			  &src, 0, 0, WIDTH, HEIGHT,
+			  &dst, WIDTH / 2, HEIGHT / 2);
+
+	scratch_buf_check(data, &dst, 10, 10, DST_COLOR);
+	scratch_buf_check(data, &dst, WIDTH - 10, HEIGHT - 10, SRC_COLOR);
+
+	scratch_buf_fini(data, &src);
+	scratch_buf_fini(data, &dst);
+}
+
+static void copy_flink(data_t *data)
+{
+	data_t local;
+	struct igt_buf src, dst;
+	struct igt_buf local_src, local_dst;
+	struct igt_buf flink;
+	uint32_t name;
+
+	data_init(&local);
+
+	scratch_buf_init(data, &src, WIDTH, HEIGHT, STRIDE, 0);
+	scratch_buf_init(data, &dst, WIDTH, HEIGHT, STRIDE, DST_COLOR);
+
+	data->render_copy(data->batch, NULL,
+			  &src, 0, 0, WIDTH, HEIGHT,
+			  &dst, WIDTH, HEIGHT);
+
+	scratch_buf_init(&local, &local_src, WIDTH, HEIGHT, STRIDE, 0);
+	scratch_buf_init(&local, &local_dst, WIDTH, HEIGHT, STRIDE, SRC_COLOR);
+
+	local.render_copy(local.batch, NULL,
+			  &local_src, 0, 0, WIDTH, HEIGHT,
+			  &local_dst, WIDTH, HEIGHT);
+
+
+	drm_intel_bo_flink(local_dst.bo, &name);
+	flink = local_dst;
+	flink.bo = drm_intel_bo_gem_create_from_name(data->bufmgr, "flink", name);
+
+	data->render_copy(data->batch, NULL,
+			  &flink, 0, 0, WIDTH, HEIGHT,
+			  &dst, WIDTH / 2, HEIGHT / 2);
+
+	scratch_buf_check(data, &dst, 10, 10, DST_COLOR);
+	scratch_buf_check(data, &dst, WIDTH - 10, HEIGHT - 10, SRC_COLOR);
+
+	scratch_buf_check(data, &dst, 10, 10, DST_COLOR);
+	scratch_buf_check(data, &dst, WIDTH - 10, HEIGHT - 10, SRC_COLOR);
+
+	scratch_buf_fini(data, &src);
+	scratch_buf_fini(data, &flink);
+	scratch_buf_fini(data, &dst);
+
+	scratch_buf_fini(&local, &local_src);
+	scratch_buf_fini(&local, &local_dst);
+
+	data_fini(&local);
+}
+
+int main(int argc, char **argv)
+{
+	data_t data = {0, };
+
+	igt_subtest_init(argc, argv);
+
+	igt_fixture {
+		data_init(&data);
+		igt_require_gem(data.fd);
+	}
+
+	igt_subtest("normal") {
+		int loop = 100;
+		while (loop--)
+			copy(&data);
+	}
+
+	igt_subtest("interruptible") {
+		int loop = 100;
+		igt_fork_signal_helper();
+		while (loop--)
+			copy(&data);
+		igt_stop_signal_helper();
+	}
+
+	igt_subtest("flink") {
+		int loop = 100;
+		while (loop--)
+			copy_flink(&data);
+	}
+
+	igt_subtest("flink-interruptible") {
+		int loop = 100;
+		igt_fork_signal_helper();
+		while (loop--)
+			copy_flink(&data);
+		igt_stop_signal_helper();
+	}
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_render_linear_blits.c b/tests/i915/gem_render_linear_blits.c
new file mode 100644
index 00000000..a1a7e033
--- /dev/null
+++ b/tests/i915/gem_render_linear_blits.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gem_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include "config.h"
+
+#include "igt.h"
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+#define WIDTH 512
+#define STRIDE (WIDTH*4)
+#define HEIGHT 512
+#define SIZE (HEIGHT*STRIDE)
+
+static uint32_t linear[WIDTH*HEIGHT];
+static igt_render_copyfunc_t render_copy;
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	int i;
+
+	gem_read(fd, handle, 0, linear, sizeof(linear));
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		igt_assert_f(linear[i] == val,
+			"Expected 0x%08x, found 0x%08x "
+			"at offset 0x%08x\n",
+			val, linear[i], i * 4);
+		val++;
+	}
+}
+
+static void run_test (int fd, int count)
+{
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	uint32_t *start_val;
+	drm_intel_bo **bo;
+	uint32_t start = 0;
+	int i, j;
+
+	render_copy = igt_get_render_copyfunc(intel_get_drm_devid(fd));
+	igt_require(render_copy);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	bo = malloc(sizeof(*bo)*count);
+	start_val = malloc(sizeof(*start_val)*count);
+
+	for (i = 0; i < count; i++) {
+		bo[i] = drm_intel_bo_alloc(bufmgr, "", SIZE, 4096);
+		start_val[i] = start;
+		for (j = 0; j < WIDTH*HEIGHT; j++)
+			linear[j] = start++;
+		gem_write(fd, bo[i]->handle, 0, linear, sizeof(linear));
+	}
+
+	igt_info("Verifying initialisation - %d buffers of %d bytes\n", count, SIZE);
+	for (i = 0; i < count; i++)
+		check_bo(fd, bo[i]->handle, start_val[i]);
+
+	igt_info("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		struct igt_buf src = {}, dst = {};
+
+		src.bo = bo[i % count];
+		src.stride = STRIDE;
+		src.tiling = I915_TILING_NONE;
+		src.size = SIZE;
+
+		dst.bo = bo[(i + 1) % count];
+		dst.stride = STRIDE;
+		dst.tiling = I915_TILING_NONE;
+		dst.size = SIZE;
+
+		render_copy(batch, NULL, &src, 0, 0, WIDTH, HEIGHT, &dst, 0, 0);
+		start_val[(i + 1) % count] = start_val[i % count];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, bo[i]->handle, start_val[i]);
+
+	if (igt_run_in_simulation())
+		return;
+
+	igt_info("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		struct igt_buf src = {}, dst = {};
+
+		src.bo = bo[(i + 1) % count];
+		src.stride = STRIDE;
+		src.tiling = I915_TILING_NONE;
+		src.size = SIZE;
+
+		dst.bo = bo[i % count];
+		dst.stride = STRIDE;
+		dst.tiling = I915_TILING_NONE;
+		dst.size = SIZE;
+
+		render_copy(batch, NULL, &src, 0, 0, WIDTH, HEIGHT, &dst, 0, 0);
+		start_val[i % count] = start_val[(i + 1) % count];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, bo[i]->handle, start_val[i]);
+
+	igt_info("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		struct igt_buf src = {}, dst = {};
+		int s = random() % count;
+		int d = random() % count;
+
+		if (s == d)
+			continue;
+
+		src.bo = bo[s];
+		src.stride = STRIDE;
+		src.tiling = I915_TILING_NONE;
+		src.size = SIZE;
+
+		dst.bo = bo[d];
+		dst.stride = STRIDE;
+		dst.tiling = I915_TILING_NONE;
+		dst.size = SIZE;
+
+		render_copy(batch, NULL, &src, 0, 0, WIDTH, HEIGHT, &dst, 0, 0);
+		start_val[d] = start_val[s];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, bo[i]->handle, start_val[i]);
+
+	/* release resources */
+	for (i = 0; i < count; i++) {
+		drm_intel_bo_unreference(bo[i]);
+	}
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+}
+
+igt_main
+{
+	static int fd = 0;
+	int count=0;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	igt_subtest("basic") {
+		run_test(fd, 2);
+	}
+
+	/* the rest of the tests are too long for simulation */
+	igt_skip_on_simulation();
+
+	igt_subtest("aperture-thrash") {
+		count = 3 * gem_aperture_size(fd) / SIZE / 2;
+		intel_require_memory(count, SIZE, CHECK_RAM);
+		run_test(fd, count);
+	}
+
+	igt_subtest("aperture-shrink") {
+		igt_fork_shrink_helper(fd);
+
+		count = 3 * gem_aperture_size(fd) / SIZE / 2;
+		intel_require_memory(count, SIZE, CHECK_RAM);
+		run_test(fd, count);
+
+		igt_stop_shrink_helper();
+	}
+
+	igt_subtest("swap-thrash") {
+		uint64_t swap_mb = intel_get_total_swap_mb();
+		igt_require(swap_mb > 0);
+		count = ((intel_get_avail_ram_mb() + (swap_mb / 2)) * 1024*1024) / SIZE;
+		intel_require_memory(count, SIZE, CHECK_RAM | CHECK_SWAP);
+		run_test(fd, count);
+	}
+}
diff --git a/tests/i915/gem_render_tiled_blits.c b/tests/i915/gem_render_tiled_blits.c
new file mode 100644
index 00000000..3484d561
--- /dev/null
+++ b/tests/i915/gem_render_tiled_blits.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gem_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+#define WIDTH 512
+#define STRIDE (WIDTH*4)
+#define HEIGHT 512
+#define SIZE (HEIGHT*STRIDE)
+
+static igt_render_copyfunc_t render_copy;
+static drm_intel_bo *linear;
+static uint32_t data[WIDTH*HEIGHT];
+static int snoop;
+
+static void
+check_bo(struct intel_batchbuffer *batch, struct igt_buf *buf, uint32_t val)
+{
+	struct igt_buf tmp = {};
+	uint32_t *ptr;
+	int i;
+
+	tmp.bo = linear;
+	tmp.stride = STRIDE;
+	tmp.tiling = I915_TILING_NONE;
+	tmp.size = SIZE;
+
+	render_copy(batch, NULL, buf, 0, 0, WIDTH, HEIGHT, &tmp, 0, 0);
+	if (snoop) {
+		do_or_die(drm_intel_bo_map(linear, 0));
+		ptr = linear->virtual;
+	} else {
+		do_or_die(drm_intel_bo_get_subdata(linear, 0, sizeof(data), data));
+		ptr = data;
+	}
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		igt_assert_f(ptr[i] == val,
+			"Expected 0x%08x, found 0x%08x "
+			"at offset 0x%08x\n",
+			val, ptr[i], i * 4);
+		val++;
+	}
+	if (ptr != data)
+		drm_intel_bo_unmap(linear);
+}
+
+static void run_test (int fd, int count)
+{
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	uint32_t *start_val;
+	struct igt_buf *buf;
+	uint32_t start = 0;
+	int i, j;
+	uint32_t devid;
+
+	devid = intel_get_drm_devid(fd);
+
+	render_copy = igt_get_render_copyfunc(devid);
+	igt_require(render_copy);
+
+	snoop = 1;
+	if (IS_GEN2(devid)) /* chipset only handles cached -> uncached */
+		snoop = 0;
+	if (IS_BROADWATER(devid) || IS_CRESTLINE(devid)) /* snafu */
+		snoop = 0;
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_set_vma_cache_size(bufmgr, 32);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	linear = drm_intel_bo_alloc(bufmgr, "linear", WIDTH*HEIGHT*4, 0);
+	if (snoop) {
+		gem_set_caching(fd, linear->handle, 1);
+		igt_info("Using a snoop linear buffer for comparisons\n");
+	}
+
+	buf = malloc(sizeof(*buf)*count);
+	start_val = malloc(sizeof(*start_val)*count);
+
+	for (i = 0; i < count; i++) {
+		uint32_t tiling = I915_TILING_X + (random() & 1);
+		unsigned long pitch = STRIDE;
+		uint32_t *ptr;
+
+		buf[i].bo = drm_intel_bo_alloc_tiled(bufmgr, "",
+						     WIDTH, HEIGHT, 4,
+						     &tiling, &pitch, 0);
+		buf[i].stride = pitch;
+		buf[i].tiling = tiling;
+		buf[i].size = SIZE;
+
+		start_val[i] = start;
+
+		do_or_die(drm_intel_gem_bo_map_gtt(buf[i].bo));
+		ptr = buf[i].bo->virtual;
+		for (j = 0; j < WIDTH*HEIGHT; j++)
+			ptr[j] = start++;
+		drm_intel_gem_bo_unmap_gtt(buf[i].bo);
+	}
+
+	igt_info("Verifying initialisation...\n");
+	for (i = 0; i < count; i++)
+		check_bo(batch, &buf[i], start_val[i]);
+
+	igt_info("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		render_copy(batch, NULL, buf+src, 0, 0, WIDTH, HEIGHT, buf+dst, 0, 0);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(batch, &buf[i], start_val[i]);
+
+	igt_info("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		render_copy(batch, NULL, buf+src, 0, 0, WIDTH, HEIGHT, buf+dst, 0, 0);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(batch, &buf[i], start_val[i]);
+
+	igt_info("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		render_copy(batch, NULL, buf+src, 0, 0, WIDTH, HEIGHT, buf+dst, 0, 0);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(batch, &buf[i], start_val[i]);
+
+	/* release resources */
+	drm_intel_bo_unreference(linear);
+	for (i = 0; i < count; i++) {
+		drm_intel_bo_unreference(buf[i].bo);
+	}
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+}
+
+
+igt_main
+{
+	int fd = 0;
+	int count = 0;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	igt_subtest("basic") {
+		run_test(fd, 2);
+	}
+
+	/* the rest of the tests are too long for simulation */
+	igt_skip_on_simulation();
+
+	igt_subtest("aperture-thrash") {
+		count = 3 * gem_aperture_size(fd) / SIZE / 2;
+		intel_require_memory(count, SIZE, CHECK_RAM);
+		run_test(fd, count);
+	}
+
+	igt_subtest("aperture-shrink") {
+		igt_fork_shrink_helper(fd);
+
+		count = 3 * gem_aperture_size(fd) / SIZE / 2;
+		intel_require_memory(count, SIZE, CHECK_RAM);
+		run_test(fd, count);
+
+		igt_stop_shrink_helper();
+	}
+
+	igt_subtest("swap-thrash") {
+		uint64_t swap_mb = intel_get_total_swap_mb();
+		igt_require(swap_mb > 0);
+		count = ((intel_get_avail_ram_mb() + (swap_mb / 2)) * 1024*1024) / SIZE;
+		intel_require_memory(count, SIZE, CHECK_RAM | CHECK_SWAP);
+		run_test(fd, count);
+	}
+}
diff --git a/tests/i915/gem_request_retire.c b/tests/i915/gem_request_retire.c
new file mode 100644
index 00000000..ea1c7327
--- /dev/null
+++ b/tests/i915/gem_request_retire.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+ *
+ */
+
+/** @file gem_request_retire
+ *
+ * Collection of tests targeting request retirement code paths.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <pthread.h>
+#include <time.h>
+
+#include "drm.h"
+#include "i915_drm.h"
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Collection of tests targeting request retirement code"
+		     " paths.");
+
+#define WIDTH 4096
+#define HEIGHT 4096
+#define BO_SIZE (WIDTH * HEIGHT * sizeof(uint32_t))
+
+static uint32_t
+blit(int fd, uint32_t dst, uint32_t src, uint32_t ctx_id)
+{
+	const unsigned int copies = 1000;
+	uint32_t batch[12 * copies + 5];
+	struct drm_i915_gem_relocation_entry reloc[2 * copies];
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	unsigned int i = 0, j, r = 0;
+
+	for (j = 0; j < copies; j++) {
+		reloc[r].target_handle = dst;
+		reloc[r].delta = 0;
+		reloc[r].offset = (i + 4) * sizeof(uint32_t);
+		reloc[r].presumed_offset = 0;
+		reloc[r].read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc[r].write_domain = I915_GEM_DOMAIN_RENDER;
+
+		r++;
+
+		reloc[r].target_handle = src;
+		reloc[r].delta = 0;
+		reloc[r].offset = (i + 7) * sizeof(uint32_t);
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			reloc[r].offset += sizeof(uint32_t);
+		reloc[r].presumed_offset = 0;
+		reloc[r].read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc[r].write_domain = 0;
+
+		r++;
+
+		batch[i++] = XY_SRC_COPY_BLT_CMD |
+			XY_SRC_COPY_BLT_WRITE_ALPHA |
+			XY_SRC_COPY_BLT_WRITE_RGB;
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			batch[i - 1] |= 8;
+		else
+			batch[i - 1] |= 6;
+
+		batch[i++] = (3 << 24) | /* 32 bits */
+			(0xcc << 16) | /* copy ROP */
+			WIDTH*4;
+		batch[i++] = 0; /* dst x1,y1 */
+		batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
+		batch[i++] = 0; /* dst reloc */
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			batch[i++] = 0;
+		batch[i++] = 0; /* src x1,y1 */
+		batch[i++] = WIDTH*4;
+		batch[i++] = 0; /* src reloc */
+		if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+			batch[i++] = 0;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+
+	while (i % 4)
+		batch[i++] = MI_NOOP;
+
+	handle = gem_create(fd, sizeof(batch));
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	memset(obj, 0, sizeof(obj));
+	memset(&exec, 0, sizeof(exec));
+
+	obj[exec.buffer_count++].handle = dst;
+	if (src != dst)
+		obj[exec.buffer_count++].handle = src;
+	obj[exec.buffer_count].handle = handle;
+	obj[exec.buffer_count].relocation_count = 2 * copies;
+	obj[exec.buffer_count].relocs_ptr = to_user_pointer(reloc);
+	exec.buffer_count++;
+	exec.buffers_ptr = to_user_pointer(obj);
+
+	exec.batch_len = i * sizeof(uint32_t);
+	exec.flags = I915_EXEC_BLT;
+	i915_execbuffer2_set_context_id(exec, ctx_id);
+
+	gem_execbuf(fd, &exec);
+
+	return handle;
+}
+
+static uint32_t
+noop(int fd, uint32_t src, uint32_t ctx_id)
+{
+	uint32_t batch[4];
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	unsigned int i = 0;
+
+	batch[i++] = MI_NOOP;
+	batch[i++] = MI_BATCH_BUFFER_END;
+	batch[i++] = MI_NOOP;
+	batch[i++] = MI_NOOP;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	memset(obj, 0, sizeof(obj));
+	memset(&exec, 0, sizeof(exec));
+
+	obj[exec.buffer_count++].handle = src;
+	obj[exec.buffer_count].handle = handle;
+	obj[exec.buffer_count].relocation_count = 0;
+	obj[exec.buffer_count].relocs_ptr = to_user_pointer(0);
+	exec.buffer_count++;
+	exec.buffers_ptr = to_user_pointer(obj);
+
+	exec.batch_len = i * sizeof(uint32_t);
+	exec.flags = I915_EXEC_RENDER;
+	i915_execbuffer2_set_context_id(exec, ctx_id);
+
+	gem_execbuf(fd, &exec);
+
+	return handle;
+}
+
+/*
+ * A single bo is operated from batchbuffers submitted from two contexts and on
+ * different rings.
+ * One execbuf finishes way ahead of the other at which point the respective
+ * context is destroyed.
+ */
+static void
+test_retire_vma_not_inactive(int fd)
+{
+	uint32_t ctx_id;
+	uint32_t src, dst;
+	uint32_t blit_bb, noop_bb;
+
+	igt_require(HAS_BLT_RING(intel_get_drm_devid(fd)));
+
+	ctx_id = gem_context_create(fd);
+
+	/* Create some bos batch buffers will operate on. */
+	src = gem_create(fd, BO_SIZE);
+	dst = gem_create(fd, BO_SIZE);
+
+	/* Submit a long running batch. */
+	blit_bb = blit(fd, dst, src, 0);
+
+	/* Submit a quick batch referencing the same object. */
+	noop_bb = noop(fd, src, ctx_id);
+
+	/* Wait for the quick batch to complete. */
+	gem_sync(fd, noop_bb);
+	gem_close(fd, noop_bb);
+
+	/* Now destroy the context in which the quick batch was submitted. */
+	gem_context_destroy(fd, ctx_id);
+
+	/* Wait for the slow batch to finish and clean up. */
+	gem_sync(fd, blit_bb);
+	gem_close(fd, blit_bb);
+
+	gem_close(fd, src);
+	gem_close(fd, dst);
+}
+
+int fd;
+
+int main(int argc, char **argv)
+{
+	igt_subtest_init(argc, argv);
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		gem_require_contexts(fd);
+	}
+
+	igt_subtest("retire-vma-not-inactive")
+		test_retire_vma_not_inactive(fd);
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_reset_stats.c b/tests/i915/gem_reset_stats.c
new file mode 100644
index 00000000..ac9af23f
--- /dev/null
+++ b/tests/i915/gem_reset_stats.c
@@ -0,0 +1,854 @@
+/*
+ * Copyright (c) 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *  Mika Kuoppala <mika.kuoppala@intel.com>
+ *
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+#include <limits.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+
+
+#define RS_NO_ERROR      0
+#define RS_BATCH_ACTIVE  (1 << 0)
+#define RS_BATCH_PENDING (1 << 1)
+#define RS_UNKNOWN       (1 << 2)
+
+
+static uint32_t devid;
+
+struct local_drm_i915_reset_stats {
+	__u32 ctx_id;
+	__u32 flags;
+	__u32 reset_count;
+	__u32 batch_active;
+	__u32 batch_pending;
+	__u32 pad;
+};
+
+#define MAX_FD 32
+
+#define GET_RESET_STATS_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x32, struct local_drm_i915_reset_stats)
+
+#define LOCAL_I915_EXEC_VEBOX	(4 << 0)
+
+static void sync_gpu(void)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+	gem_quiescent_gpu(fd);
+	close(fd);
+}
+
+static int noop(int fd, uint32_t ctx, const struct intel_execution_engine *e)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 eb;
+	struct drm_i915_gem_exec_object2 exec;
+	int ret;
+
+	memset(&exec, 0, sizeof(exec));
+	exec.handle = gem_create(fd, 4096);
+	igt_assert((int)exec.handle > 0);
+	gem_write(fd, exec.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&eb, 0, sizeof(eb));
+	eb.buffers_ptr = to_user_pointer(&exec);
+	eb.buffer_count = 1;
+	eb.flags = e->exec_id | e->flags;
+	i915_execbuffer2_set_context_id(eb, ctx);
+
+	ret = __gem_execbuf(fd, &eb);
+	if (ret < 0) {
+		gem_close(fd, exec.handle);
+		return ret;
+	}
+
+	return exec.handle;
+}
+
+static int has_engine(int fd,
+		      uint32_t ctx,
+		      const struct intel_execution_engine *e)
+{
+	int handle = noop(fd, ctx, e);
+	if (handle < 0)
+		return 0;
+	gem_close(fd, handle);
+	return 1;
+}
+
+static void check_context(const struct intel_execution_engine *e)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+
+	gem_require_contexts(fd);
+	igt_require(has_engine(fd, gem_context_create(fd), e));
+
+	close(fd);
+}
+
+static int gem_reset_stats(int fd, int ctx_id,
+			   struct local_drm_i915_reset_stats *rs)
+{
+	memset(rs, 0, sizeof(*rs));
+	rs->ctx_id = ctx_id;
+	rs->reset_count = -1;
+
+	if (drmIoctl(fd, GET_RESET_STATS_IOCTL, rs))
+		return -errno;
+
+	igt_assert(rs->reset_count != -1);
+	return 0;
+}
+
+static int gem_reset_status(int fd, int ctx_id)
+{
+	struct local_drm_i915_reset_stats rs;
+	int ret;
+
+	ret = gem_reset_stats(fd, ctx_id, &rs);
+	if (ret)
+		return ret;
+
+	if (rs.batch_active)
+		return RS_BATCH_ACTIVE;
+	if (rs.batch_pending)
+		return RS_BATCH_PENDING;
+
+	return RS_NO_ERROR;
+}
+
+static struct timespec ts_injected;
+
+#define BAN HANG_ALLOW_BAN
+#define ASYNC 2
+static void inject_hang(int fd, uint32_t ctx,
+			const struct intel_execution_engine *e,
+			unsigned flags)
+{
+	igt_hang_t hang;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_injected);
+
+	hang = igt_hang_ctx(fd, ctx, e->exec_id | e->flags, flags & BAN);
+	if ((flags & ASYNC) == 0)
+		igt_post_hang_ring(fd, hang);
+}
+
+static const char *status_to_string(int x)
+{
+	const char *strings[] = {
+		"No error",
+		"Guilty",
+		"Pending",
+	};
+	if (x >= ARRAY_SIZE(strings))
+		return "Unknown";
+	return strings[x];
+}
+
+static int _assert_reset_status(int idx, int fd, int ctx, int status)
+{
+	int rs;
+
+	rs = gem_reset_status(fd, ctx);
+	if (rs < 0) {
+		igt_info("reset status for %d ctx %d returned %d\n",
+			 idx, ctx, rs);
+		return rs;
+	}
+
+	if (rs != status) {
+		igt_info("%d:%d expected '%s' [%d], found '%s' [%d]\n",
+			 idx, ctx,
+			 status_to_string(status), status,
+			 status_to_string(rs), rs);
+
+		return 1;
+	}
+
+	return 0;
+}
+
+#define assert_reset_status(idx, fd, ctx, status) \
+	igt_assert(_assert_reset_status(idx, fd, ctx, status) == 0)
+
+static void test_rs(const struct intel_execution_engine *e,
+		    int num_fds, int hang_index, int rs_assumed_no_hang)
+{
+	int fd[MAX_FD];
+	int i;
+
+	igt_assert_lte(num_fds, MAX_FD);
+	igt_assert_lt(hang_index, MAX_FD);
+
+	igt_debug("num fds=%d, hang index=%d\n", num_fds, hang_index);
+
+	for (i = 0; i < num_fds; i++) {
+		fd[i] = drm_open_driver(DRIVER_INTEL);
+		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
+	}
+
+	sync_gpu();
+	for (i = 0; i < num_fds; i++) {
+		if (i == hang_index)
+			inject_hang(fd[i], 0, e, ASYNC);
+		else
+			igt_assert(noop(fd[i], 0, e) > 0);
+	}
+	sync_gpu();
+
+	for (i = 0; i < num_fds; i++) {
+		if (hang_index < 0) {
+			assert_reset_status(i, fd[i], 0, rs_assumed_no_hang);
+			continue;
+		}
+
+		if (i < hang_index)
+			assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
+		if (i == hang_index)
+			assert_reset_status(i, fd[i], 0, RS_BATCH_ACTIVE);
+		if (i > hang_index)
+			assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
+	}
+
+	igt_assert(igt_seconds_elapsed(&ts_injected) <= 30);
+
+	for (i = 0; i < num_fds; i++)
+		close(fd[i]);
+}
+
+#define MAX_CTX 100
+static void test_rs_ctx(const struct intel_execution_engine *e,
+			int num_fds, int num_ctx, int hang_index,
+			int hang_context)
+{
+	int i, j;
+	int fd[MAX_FD];
+	int ctx[MAX_FD][MAX_CTX];
+
+	igt_assert_lte(num_fds, MAX_FD);
+	igt_assert_lt(hang_index, MAX_FD);
+
+	igt_assert_lte(num_ctx, MAX_CTX);
+	igt_assert_lt(hang_context, MAX_CTX);
+
+	test_rs(e, num_fds, -1, RS_NO_ERROR);
+
+	for (i = 0; i < num_fds; i++) {
+		fd[i] = drm_open_driver(DRIVER_INTEL);
+		igt_assert(fd[i]);
+		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
+
+		for (j = 0; j < num_ctx; j++) {
+			ctx[i][j] = gem_context_create(fd[i]);
+		}
+
+		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
+	}
+
+	for (i = 0; i < num_fds; i++) {
+		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
+
+		for (j = 0; j < num_ctx; j++)
+			assert_reset_status(i, fd[i], ctx[i][j], RS_NO_ERROR);
+
+		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
+	}
+
+	for (i = 0; i < num_fds; i++) {
+		for (j = 0; j < num_ctx; j++) {
+			if (i == hang_index && j == hang_context)
+				inject_hang(fd[i], ctx[i][j], e, ASYNC);
+			else
+				igt_assert(noop(fd[i], ctx[i][j], e) > 0);
+		}
+	}
+	sync_gpu();
+
+	igt_assert(igt_seconds_elapsed(&ts_injected) <= 30);
+
+	for (i = 0; i < num_fds; i++)
+		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
+
+	for (i = 0; i < num_fds; i++) {
+		for (j = 0; j < num_ctx; j++) {
+			if (i < hang_index)
+				assert_reset_status(i, fd[i], ctx[i][j], RS_NO_ERROR);
+			if (i == hang_index && j < hang_context)
+				assert_reset_status(i, fd[i], ctx[i][j], RS_NO_ERROR);
+			if (i == hang_index && j == hang_context)
+				assert_reset_status(i, fd[i], ctx[i][j],
+						    RS_BATCH_ACTIVE);
+			if (i == hang_index && j > hang_context)
+				assert_reset_status(i, fd[i], ctx[i][j],
+						    RS_NO_ERROR);
+			if (i > hang_index)
+				assert_reset_status(i, fd[i], ctx[i][j],
+						    RS_NO_ERROR);
+		}
+	}
+
+	for (i = 0; i < num_fds; i++) {
+		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
+		close(fd[i]);
+	}
+}
+
+static void test_ban(const struct intel_execution_engine *e)
+{
+	struct local_drm_i915_reset_stats rs_bad, rs_good;
+	int fd_bad, fd_good;
+	int ban, retry = 10;
+	int active_count = 0;
+
+	fd_bad = drm_open_driver(DRIVER_INTEL);
+	fd_good = drm_open_driver(DRIVER_INTEL);
+
+	assert_reset_status(fd_bad, fd_bad, 0, RS_NO_ERROR);
+	assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR);
+
+	noop(fd_bad, 0, e);
+	noop(fd_good, 0, e);
+
+	assert_reset_status(fd_bad, fd_bad, 0, RS_NO_ERROR);
+	assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR);
+
+	inject_hang(fd_bad, 0, e, BAN | ASYNC);
+	active_count++;
+
+	noop(fd_good, 0, e);
+	noop(fd_good, 0, e);
+
+	while (retry--) {
+		inject_hang(fd_bad, 0, e, BAN);
+		active_count++;
+
+		ban = noop(fd_bad, 0, e);
+		if (ban == -EIO)
+			break;
+
+		/* Should not happen often but sometimes hang is declared too
+		 * slow due to our way of faking hang using loop */
+		gem_close(fd_bad, ban);
+
+		igt_info("retrying for ban (%d)\n", retry);
+	}
+	igt_assert_eq(ban, -EIO);
+	igt_assert_lt(0, noop(fd_good, 0, e));
+
+	assert_reset_status(fd_bad, fd_bad, 0, RS_BATCH_ACTIVE);
+	igt_assert_eq(gem_reset_stats(fd_bad, 0, &rs_bad), 0);
+	igt_assert_eq(rs_bad.batch_active, active_count);
+
+	assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR);
+	igt_assert_eq(gem_reset_stats(fd_good, 0, &rs_good), 0);
+	igt_assert_eq(rs_good.batch_active, 0);
+
+	close(fd_bad);
+	close(fd_good);
+}
+
+static void test_ban_ctx(const struct intel_execution_engine *e)
+{
+	struct local_drm_i915_reset_stats rs_bad, rs_good;
+	int fd, ban, retry = 10;
+	uint32_t ctx_good, ctx_bad;
+	int active_count = 0;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	assert_reset_status(fd, fd, 0, RS_NO_ERROR);
+
+	ctx_good = gem_context_create(fd);
+	ctx_bad = gem_context_create(fd);
+
+	assert_reset_status(fd, fd, 0, RS_NO_ERROR);
+	assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR);
+	assert_reset_status(fd, fd, ctx_bad, RS_NO_ERROR);
+
+	noop(fd, ctx_bad, e);
+	noop(fd, ctx_good, e);
+
+	assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR);
+	assert_reset_status(fd, fd, ctx_bad, RS_NO_ERROR);
+
+	inject_hang(fd, ctx_bad, e, BAN | ASYNC);
+	active_count++;
+
+	noop(fd, ctx_good, e);
+	noop(fd, ctx_good, e);
+
+	while (retry--) {
+		inject_hang(fd, ctx_bad, e, BAN);
+		active_count++;
+
+		ban = noop(fd, ctx_bad, e);
+		if (ban == -EIO)
+			break;
+
+		/* Should not happen often but sometimes hang is declared too
+		 * slow due to our way of faking hang using loop */
+		gem_close(fd, ban);
+
+		igt_info("retrying for ban (%d)\n", retry);
+	}
+	igt_assert_eq(ban, -EIO);
+	igt_assert_lt(0, noop(fd, ctx_good, e));
+
+	assert_reset_status(fd, fd, ctx_bad, RS_BATCH_ACTIVE);
+	igt_assert_eq(gem_reset_stats(fd, ctx_bad, &rs_bad), 0);
+	igt_assert_eq(rs_bad.batch_active, active_count);
+
+	assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR);
+	igt_assert_eq(gem_reset_stats(fd, ctx_good, &rs_good), 0);
+	igt_assert_eq(rs_good.batch_active, 0);
+
+	close(fd);
+}
+
+static void test_unrelated_ctx(const struct intel_execution_engine *e)
+{
+	int fd1,fd2;
+	int ctx_guilty, ctx_unrelated;
+
+	fd1 = drm_open_driver(DRIVER_INTEL);
+	fd2 = drm_open_driver(DRIVER_INTEL);
+	assert_reset_status(0, fd1, 0, RS_NO_ERROR);
+	assert_reset_status(1, fd2, 0, RS_NO_ERROR);
+	ctx_guilty = gem_context_create(fd1);
+	ctx_unrelated = gem_context_create(fd2);
+
+	assert_reset_status(0, fd1, ctx_guilty, RS_NO_ERROR);
+	assert_reset_status(1, fd2, ctx_unrelated, RS_NO_ERROR);
+
+	inject_hang(fd1, ctx_guilty, e, 0);
+	assert_reset_status(0, fd1, ctx_guilty, RS_BATCH_ACTIVE);
+	assert_reset_status(1, fd2, ctx_unrelated, RS_NO_ERROR);
+
+	gem_sync(fd2, noop(fd2, ctx_unrelated, e));
+	assert_reset_status(0, fd1, ctx_guilty, RS_BATCH_ACTIVE);
+	assert_reset_status(1, fd2, ctx_unrelated, RS_NO_ERROR);
+
+	close(fd1);
+	close(fd2);
+}
+
+static int get_reset_count(int fd, int ctx)
+{
+	int ret;
+	struct local_drm_i915_reset_stats rs;
+
+	ret = gem_reset_stats(fd, ctx, &rs);
+	if (ret)
+		return ret;
+
+	return rs.reset_count;
+}
+
+static void test_close_pending_ctx(const struct intel_execution_engine *e)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+	uint32_t ctx = gem_context_create(fd);
+
+	assert_reset_status(fd, fd, ctx, RS_NO_ERROR);
+
+	inject_hang(fd, ctx, e, 0);
+	gem_context_destroy(fd, ctx);
+	igt_assert_eq(__gem_context_destroy(fd, ctx), -ENOENT);
+
+	close(fd);
+}
+
+static void test_close_pending(const struct intel_execution_engine *e)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+
+	assert_reset_status(fd, fd, 0, RS_NO_ERROR);
+
+	inject_hang(fd, 0, e, 0);
+	close(fd);
+}
+
+static void noop_on_each_ring(int fd, const bool reverse)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 eb;
+	struct drm_i915_gem_exec_object2 obj;
+	const struct intel_execution_engine *e;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = gem_create(fd, 4096);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&eb, 0, sizeof(eb));
+	eb.buffers_ptr = to_user_pointer(&obj);
+	eb.buffer_count = 1;
+
+	if (reverse) {
+		for (e = intel_execution_engines; e->name; e++)
+			;
+		while (--e >= intel_execution_engines) {
+			eb.flags = e->exec_id | e->flags;
+			__gem_execbuf(fd, &eb);
+		}
+	} else {
+		for (e = intel_execution_engines; e->name; e++) {
+			eb.flags = e->exec_id | e->flags;
+			__gem_execbuf(fd, &eb);
+		}
+	}
+
+	gem_sync(fd, obj.handle);
+	gem_close(fd, obj.handle);
+}
+
+static void test_close_pending_fork(const struct intel_execution_engine *e,
+				    const bool reverse)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+	igt_hang_t hang;
+	int pid;
+
+	assert_reset_status(fd, fd, 0, RS_NO_ERROR);
+
+	hang = igt_hang_ctx(fd, 0, e->exec_id | e->flags, 0);
+	sleep(1);
+
+	/* Avoid helpers as we need to kill the child
+	 * without any extra signal handling on behalf of
+	 * lib/drmtest.c
+	 */
+	pid = fork();
+	if (pid == 0) {
+		const int fd2 = drm_open_driver(DRIVER_INTEL);
+		igt_assert_lte(0, fd2);
+
+		/* The crucial component is that we schedule the same noop batch
+		 * on each ring. This exercises batch_obj reference counting,
+		 * when gpu is reset and ring lists are cleared.
+		 */
+		noop_on_each_ring(fd2, reverse);
+		close(fd2);
+		pause();
+		exit(0);
+	} else {
+		igt_assert_lt(0, pid);
+		sleep(1);
+
+		/* Kill the child to reduce refcounts on
+		   batch_objs */
+		kill(pid, SIGKILL);
+	}
+
+	igt_post_hang_ring(fd, hang);
+	close(fd);
+}
+
+static void test_reset_count(const struct intel_execution_engine *e,
+			     const bool create_ctx)
+{
+	int fd = drm_open_driver(DRIVER_INTEL);
+	int ctx;
+	long c1, c2;
+
+	if (create_ctx)
+		ctx = gem_context_create(fd);
+	else
+		ctx = 0;
+
+	assert_reset_status(fd, fd, ctx, RS_NO_ERROR);
+
+	c1 = get_reset_count(fd, ctx);
+	igt_assert(c1 >= 0);
+
+	inject_hang(fd, ctx, e, 0);
+
+	assert_reset_status(fd, fd, ctx, RS_BATCH_ACTIVE);
+	c2 = get_reset_count(fd, ctx);
+	igt_assert(c2 >= 0);
+	igt_assert(c2 == (c1 + 1));
+
+	igt_fork(child, 1) {
+		igt_drop_root();
+
+		c2 = get_reset_count(fd, ctx);
+
+		igt_assert(c2 == 0);
+	}
+
+	igt_waitchildren();
+
+	if (create_ctx)
+		gem_context_destroy(fd, ctx);
+
+	close(fd);
+}
+
+static int _test_params(int fd, int ctx, uint32_t flags, uint32_t pad)
+{
+	struct local_drm_i915_reset_stats rs;
+
+	memset(&rs, 0, sizeof(rs));
+	rs.ctx_id = ctx;
+	rs.flags = flags;
+	rs.reset_count = rand();
+	rs.batch_active = rand();
+	rs.batch_pending = rand();
+	rs.pad = pad;
+
+	if (drmIoctl(fd, GET_RESET_STATS_IOCTL, &rs))
+		return -errno;
+
+	return 0;
+}
+
+typedef enum { root = 0, user } cap_t;
+
+static void _check_param_ctx(const int fd, const int ctx, const cap_t cap)
+{
+	const uint32_t bad = rand() + 1;
+
+	if (ctx == 0) {
+		igt_assert_eq(_test_params(fd, ctx, 0, 0), 0);
+
+		if (cap != root) {
+			igt_assert(get_reset_count(fd, ctx) == 0);
+		}
+	}
+
+	igt_assert_eq(_test_params(fd, ctx, 0, bad), -EINVAL);
+	igt_assert_eq(_test_params(fd, ctx, bad, 0), -EINVAL);
+	igt_assert_eq(_test_params(fd, ctx, bad, bad), -EINVAL);
+}
+
+static void check_params(const int fd, const int ctx, cap_t cap)
+{
+	igt_assert(ioctl(fd, GET_RESET_STATS_IOCTL, 0) == -1);
+	igt_assert_eq(_test_params(fd, 0xbadbad, 0, 0), -ENOENT);
+
+	_check_param_ctx(fd, ctx, cap);
+}
+
+static void _test_param(const int fd, const int ctx)
+{
+	check_params(fd, ctx, root);
+
+	igt_fork(child, 1) {
+		check_params(fd, ctx, root);
+
+		igt_drop_root();
+
+		check_params(fd, ctx, user);
+	}
+
+	check_params(fd, ctx, root);
+
+	igt_waitchildren();
+}
+
+static void test_params_ctx(void)
+{
+	int fd;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	_test_param(fd, gem_context_create(fd));
+	close(fd);
+}
+
+static void test_params(void)
+{
+	int fd;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	_test_param(fd, 0);
+	close(fd);
+}
+
+static const struct intel_execution_engine *
+next_engine(int fd, const struct intel_execution_engine *e)
+{
+	do {
+		e++;
+		if (e->name == NULL)
+			e = intel_execution_engines;
+		if (e->exec_id == 0)
+			e++;
+	} while (!has_engine(fd, 0, e));
+
+	return e;
+}
+
+static void defer_hangcheck(const struct intel_execution_engine *engine)
+{
+	const struct intel_execution_engine *next;
+	int fd, count_start, count_end;
+	int seconds = 30;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	next = next_engine(fd, engine);
+	igt_skip_on(next == engine);
+
+	count_start = get_reset_count(fd, 0);
+	igt_assert_lte(0, count_start);
+
+	inject_hang(fd, 0, engine, 0);
+	while (--seconds) {
+		noop(fd, 0, next);
+
+		count_end = get_reset_count(fd, 0);
+		igt_assert_lte(0, count_end);
+
+		if (count_end > count_start)
+			break;
+
+		sleep(1);
+	}
+
+	igt_assert_lt(count_start, count_end);
+
+	close(fd);
+}
+
+static bool gem_has_reset_stats(int fd)
+{
+	struct local_drm_i915_reset_stats rs;
+	int ret;
+
+	/* Carefully set flags and pad to zero, otherwise
+	   we get -EINVAL
+	*/
+	memset(&rs, 0, sizeof(rs));
+
+	ret = drmIoctl(fd, GET_RESET_STATS_IOCTL, &rs);
+	if (ret == 0)
+		return true;
+
+	/* If we get EPERM, we have support but did not
+	   have CAP_SYSADM */
+	if (ret == -1 && errno == EPERM)
+		return true;
+
+	return false;
+}
+
+#define RUN_TEST(...) do { sync_gpu(); __VA_ARGS__; sync_gpu(); } while (0)
+#define RUN_CTX_TEST(...) do { check_context(e); RUN_TEST(__VA_ARGS__); } while (0)
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		int fd;
+
+		bool has_reset_stats;
+		bool using_full_reset;
+		fd = drm_open_driver(DRIVER_INTEL);
+		devid = intel_get_drm_devid(fd);
+
+		has_reset_stats = gem_has_reset_stats(fd);
+
+		igt_assert(igt_sysfs_set_parameter
+			   (fd, "reset", "%d", 1 /* only global reset */));
+
+		using_full_reset = !gem_engine_reset_enabled(fd) &&
+				   gem_gpu_reset_enabled(fd);
+
+		close(fd);
+
+		igt_require_f(has_reset_stats,
+			      "No reset stats ioctl support. Too old kernel?\n");
+		igt_require_f(using_full_reset,
+			      "Full GPU reset is not enabled. Is enable_hangcheck set?\n");
+	}
+
+	igt_subtest("params")
+		test_params();
+
+	igt_subtest_f("params-ctx")
+		RUN_TEST(test_params_ctx());
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("reset-stats-%s", e->name)
+			RUN_TEST(test_rs(e, 4, 1, 0));
+
+		igt_subtest_f("reset-stats-ctx-%s", e->name)
+			RUN_CTX_TEST(test_rs_ctx(e, 4, 4, 1, 2));
+
+		igt_subtest_f("ban-%s", e->name)
+			RUN_TEST(test_ban(e));
+
+		igt_subtest_f("ban-ctx-%s", e->name)
+			RUN_CTX_TEST(test_ban_ctx(e));
+
+		igt_subtest_f("reset-count-%s", e->name)
+			RUN_TEST(test_reset_count(e, false));
+
+		igt_subtest_f("reset-count-ctx-%s", e->name)
+			RUN_CTX_TEST(test_reset_count(e, true));
+
+		igt_subtest_f("unrelated-ctx-%s", e->name)
+			RUN_CTX_TEST(test_unrelated_ctx(e));
+
+		igt_subtest_f("close-pending-%s", e->name)
+			RUN_TEST(test_close_pending(e));
+
+		igt_subtest_f("close-pending-ctx-%s", e->name)
+			RUN_CTX_TEST(test_close_pending_ctx(e));
+
+		igt_subtest_f("close-pending-fork-%s", e->name)
+			RUN_TEST(test_close_pending_fork(e, false));
+
+		igt_subtest_f("close-pending-fork-reverse-%s", e->name)
+			RUN_TEST(test_close_pending_fork(e, true));
+
+		igt_subtest_f("defer-hangcheck-%s", e->name)
+			RUN_TEST(defer_hangcheck(e));
+	}
+
+	igt_fixture {
+		int fd;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_assert(igt_sysfs_set_parameter
+			   (fd, "reset", "%d", INT_MAX /* any reset method */));
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_ring_sync_copy.c b/tests/i915/gem_ring_sync_copy.c
new file mode 100644
index 00000000..8d372355
--- /dev/null
+++ b/tests/i915/gem_ring_sync_copy.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Damien Lespiau <damien.lespiau@intel.com>
+ */
+
+/*
+ * The goal of this test is to ensure that we respect inter ring dependencies
+ *
+ * For each pair of rings R1, R2 where we have copy support (i.e. blt,
+ * rendercpy and mediafill) do:
+ *  - Throw a busy load onto R1. gem_concurrent_blt just uses lots of buffers
+ *    for this effect.
+ *  - Fill three buffers A, B, C with unique data.
+ *  - Copy A to B on ring R1
+ *
+ * Then come the three different variants.
+ *  - Copy B to C on ring R2, check that C now contains what A originally
+ *    contained. This is the write->read hazard. gem_concurrent_blt calls this
+ *    early read.
+ *  - Copy C to A on ring R2, check that B now contains what A originally
+ *    contained. This is the read->write hazard, gem_concurrent_blt calls it
+ *    overwrite_source.
+ *  - Copy C to B on ring R2 and check that B contains what C originally
+ *    contained. This is the write/write hazard. gem_concurrent_blt doesn't
+ *    have that since for the cpu case it's too boring.
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdbool.h>
+
+
+IGT_TEST_DESCRIPTION("Ensure inter-ring dependencies are respected.");
+
+#define WIDTH	512
+#define HEIGHT	512
+#define NUM_BUSY_BUFFERS 32
+
+typedef struct {
+	int drm_fd;
+	uint32_t devid;
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+
+	/* number of buffers to keep the ring busy for a while */
+	unsigned int n_buffers_load;
+
+	uint32_t linear[WIDTH * HEIGHT];
+
+	struct {
+		igt_render_copyfunc_t copy;
+		struct igt_buf *srcs;
+		struct igt_buf *dsts;
+	} render;
+
+	struct {
+		drm_intel_bo **srcs;
+		drm_intel_bo **dsts;
+	} blitter;
+
+} data_t;
+
+enum ring {
+	RENDER,
+	BLITTER,
+};
+
+enum test {
+	TEST_WRITE_READ,
+	TEST_READ_WRITE,
+	TEST_WRITE_WRITE,
+};
+
+static const char *ring_name(enum ring ring)
+{
+	const char *names[] = {
+		"render",
+		"blitter",
+	};
+
+	return names[ring];
+}
+
+static drm_intel_bo *bo_create(data_t *data, int width, int height, int val)
+{
+	drm_intel_bo *bo;
+	int i;
+
+	bo = drm_intel_bo_alloc(data->bufmgr, "", 4 * width * height, 4096);
+	igt_assert(bo);
+
+	for (i = 0; i < width * height; i++)
+		data->linear[i] = val;
+	gem_write(data->drm_fd, bo->handle, 0, data->linear,
+		  sizeof(data->linear));
+
+	return bo;
+}
+
+static void bo_check(data_t *data, drm_intel_bo *bo, uint32_t val)
+{
+	int i;
+
+	gem_read(data->drm_fd, bo->handle, 0,
+		 data->linear, sizeof(data->linear));
+	for (i = 0; i < WIDTH * HEIGHT; i++)
+		igt_assert_eq_u32(data->linear[i], val);
+}
+
+static void scratch_buf_init_from_bo(struct igt_buf *buf, drm_intel_bo *bo)
+{
+	memset(buf, 0, sizeof(*buf));
+
+	buf->bo = bo;
+	buf->stride = 4 * WIDTH;
+	buf->tiling = I915_TILING_NONE;
+	buf->size = 4 * WIDTH * HEIGHT;
+}
+
+static void scratch_buf_init(data_t *data, struct igt_buf *buf,
+			     int width, int height, uint32_t color)
+{
+	drm_intel_bo *bo;
+
+	bo = bo_create(data, width, height, color);
+	scratch_buf_init_from_bo(buf, bo);
+}
+
+/*
+ * Provide a few ring specific vfuncs for run_test().
+ *
+ * busy()	Queue a n_buffers_load workloads onto the ring to keep it busy
+ * busy_fini()	Clean up after busy
+ * copy()	Copy one BO to another
+ */
+
+/*
+ * Render ring
+ */
+
+static void render_busy(data_t *data)
+{
+	size_t array_size;
+	int i;
+
+	/* allocate 32 buffer objects and re-use them as needed */
+	array_size = NUM_BUSY_BUFFERS * sizeof(struct igt_buf);
+
+	data->render.srcs = malloc(array_size);
+	data->render.dsts = malloc(array_size);
+
+	for (i = 0; i < NUM_BUSY_BUFFERS; i++) {
+		scratch_buf_init(data, &data->render.srcs[i], WIDTH, HEIGHT,
+				 0xdeadbeef);
+		scratch_buf_init(data, &data->render.dsts[i], WIDTH, HEIGHT,
+				 0xdeadbeef);
+	}
+
+	for (i = 0; i < data->n_buffers_load; i++) {
+		data->render.copy(data->batch,
+				  NULL,			/* context */
+				  &data->render.srcs[i % NUM_BUSY_BUFFERS],
+				  0, 0,			/* src_x, src_y */
+				  WIDTH, HEIGHT,
+				  &data->render.dsts[i % NUM_BUSY_BUFFERS],
+				  0, 0			/* dst_x, dst_y */);
+	}
+}
+
+static void render_busy_fini(data_t *data)
+{
+	int i;
+
+	for (i = 0; i < NUM_BUSY_BUFFERS; i++) {
+		drm_intel_bo_unreference(data->render.srcs[i].bo);
+		drm_intel_bo_unreference(data->render.dsts[i].bo);
+	}
+
+	free(data->render.srcs);
+	free(data->render.dsts);
+	data->render.srcs = NULL;
+	data->render.dsts = NULL;
+}
+
+static void render_copy(data_t *data, drm_intel_bo *src, drm_intel_bo *dst)
+{
+	struct igt_buf src_buf, dst_buf;
+
+	scratch_buf_init_from_bo(&src_buf, src);
+	scratch_buf_init_from_bo(&dst_buf, dst);
+
+	data->render.copy(data->batch,
+			  NULL,			/* context */
+			  &src_buf,
+			  0, 0,			/* src_x, src_y */
+			  WIDTH, HEIGHT,
+			  &dst_buf,
+			  0, 0			/* dst_x, dst_y */);
+}
+
+/*
+ * Blitter ring
+ */
+
+static void blitter_busy(data_t *data)
+{
+	size_t array_size;
+	int i;
+
+	/* allocate 32 buffer objects and re-use them as needed */
+	array_size = NUM_BUSY_BUFFERS * sizeof(drm_intel_bo *);
+
+	data->blitter.srcs = malloc(array_size);
+	data->blitter.dsts = malloc(array_size);
+
+	for (i = 0; i < NUM_BUSY_BUFFERS; i++) {
+		data->blitter.srcs[i] = bo_create(data,
+						  WIDTH, HEIGHT,
+						  0xdeadbeef);
+		data->blitter.dsts[i] = bo_create(data,
+						  WIDTH, HEIGHT,
+						  0xdeadbeef);
+	}
+
+	for (i = 0; i < data->n_buffers_load; i++) {
+		intel_copy_bo(data->batch,
+			      data->blitter.srcs[i % NUM_BUSY_BUFFERS],
+			      data->blitter.dsts[i % NUM_BUSY_BUFFERS],
+			      WIDTH*HEIGHT*4);
+	}
+}
+
+static void blitter_busy_fini(data_t *data)
+{
+	int i;
+
+	for (i = 0; i < NUM_BUSY_BUFFERS; i++) {
+		drm_intel_bo_unreference(data->blitter.srcs[i]);
+		drm_intel_bo_unreference(data->blitter.dsts[i]);
+	}
+
+	free(data->blitter.srcs);
+	free(data->blitter.dsts);
+	data->blitter.srcs = NULL;
+	data->blitter.dsts = NULL;
+}
+
+static void blitter_copy(data_t *data, drm_intel_bo *src, drm_intel_bo *dst)
+{
+	intel_copy_bo(data->batch, dst, src, WIDTH*HEIGHT*4);
+}
+
+struct ring_ops {
+	void (*busy)(data_t *data);
+	void (*busy_fini)(data_t *data);
+	void (*copy)(data_t *data, drm_intel_bo *src, drm_intel_bo *dst);
+} ops [] = {
+	{
+		.busy      = render_busy,
+		.busy_fini = render_busy_fini,
+		.copy      = render_copy,
+	},
+	{
+		.busy      = blitter_busy,
+		.busy_fini = blitter_busy_fini,
+		.copy      = blitter_copy,
+	},
+};
+
+static void run_test(data_t *data, enum ring r1, enum ring r2, enum test test)
+{
+	struct ring_ops *r1_ops = &ops[r1];
+	struct ring_ops *r2_ops = &ops[r2];
+	drm_intel_bo *a, *b, *c;
+
+	a = bo_create(data, WIDTH, HEIGHT, 0xa);
+	b = bo_create(data, WIDTH, HEIGHT, 0xb);
+	c = bo_create(data, WIDTH, HEIGHT, 0xc);
+
+	r1_ops->busy(data);
+	r1_ops->copy(data, a, b);
+
+	switch (test) {
+	case TEST_WRITE_READ:
+		r2_ops->copy(data, b, c);
+		bo_check(data, c, 0xa);
+		break;
+	case TEST_READ_WRITE:
+		r2_ops->copy(data, c, a);
+		bo_check(data, b, 0xa);
+		break;
+	case TEST_WRITE_WRITE:
+		r2_ops->copy(data, c, b);
+		bo_check(data, b, 0xc);
+		break;
+	default:
+		igt_fail(IGT_EXIT_FAILURE);
+	}
+
+	r1_ops->busy_fini(data);
+}
+
+igt_main
+{
+	data_t data = {0, };
+	int i;
+	struct combination {
+		int r1, r2;
+	} ring_combinations [] = {
+		{ RENDER, BLITTER },
+		{ BLITTER, RENDER },
+	};
+
+	igt_fixture {
+		data.drm_fd = drm_open_driver_render(DRIVER_INTEL);
+		data.devid = intel_get_drm_devid(data.drm_fd);
+
+		data.n_buffers_load = 1000;
+
+		data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
+		igt_assert(data.bufmgr);
+		drm_intel_bufmgr_gem_enable_reuse(data.bufmgr);
+
+		data.render.copy = igt_get_render_copyfunc(data.devid);
+		igt_require_f(data.render.copy,
+			      "no render-copy function\n");
+
+		data.batch = intel_batchbuffer_alloc(data.bufmgr, data.devid);
+		igt_assert(data.batch);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ring_combinations); i++) {
+		struct combination *c = &ring_combinations[i];
+
+		igt_subtest_f("sync-%s-%s-write-read",
+			      ring_name(c->r1), ring_name(c->r2))
+			run_test(&data, c->r1, c->r2, TEST_WRITE_READ);
+
+		igt_subtest_f("sync-%s-%s-read-write",
+			      ring_name(c->r1), ring_name(c->r2))
+			run_test(&data, c->r1, c->r2, TEST_READ_WRITE);
+		igt_subtest_f("sync-%s-%s-write-write",
+			      ring_name(c->r1), ring_name(c->r2))
+			run_test(&data, c->r1, c->r2, TEST_WRITE_WRITE);
+	}
+
+	igt_fixture {
+		intel_batchbuffer_free(data.batch);
+		drm_intel_bufmgr_destroy(data.bufmgr);
+		close(data.drm_fd);
+	}
+}
diff --git a/tests/i915/gem_ring_sync_loop.c b/tests/i915/gem_ring_sync_loop.c
new file mode 100644
index 00000000..118f3638
--- /dev/null
+++ b/tests/i915/gem_ring_sync_loop.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch> (based on gem_storedw_*.c)
+ *
+ */
+
+#include "igt.h"
+
+IGT_TEST_DESCRIPTION("Basic check of ring<->ring write synchronisation.");
+
+/*
+ * Testcase: Basic check of ring<->ring sync using a dummy reloc
+ *
+ * Extremely efficient at catching missed irqs with semaphores=0 ...
+ */
+
+static void
+sync_loop(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object[2];
+	struct drm_i915_gem_relocation_entry reloc[1];
+	unsigned engines[16];
+	unsigned nengine;
+	unsigned engine;
+	int i;
+
+	nengine = 0;
+	for_each_physical_engine(fd, engine)
+		engines[nengine++] = engine;
+	igt_require(nengine);
+
+	memset(object, 0, sizeof(object));
+	object[0].handle = gem_create(fd, 4096);
+	object[0].flags = EXEC_OBJECT_WRITE;
+	object[1].handle = gem_create(fd, 4096);
+	gem_write(fd, object[1].handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(object);
+	execbuf.buffer_count = 2;
+
+	/* Check if we have no-reloc support first */
+	if (__gem_execbuf(fd, &execbuf)) {
+		object[0].flags = 0;
+		object[1].relocs_ptr = to_user_pointer(reloc);
+		object[1].relocation_count = 1;
+
+		/* Add a dummy relocation to mark the object as writing */
+		memset(reloc, 0, sizeof(reloc));
+		reloc->offset = 1000;
+		reloc->target_handle = object[0].handle;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+
+		gem_execbuf(fd, &execbuf);
+	}
+
+	srandom(0xdeadbeef);
+
+	for (i = 0; i < SLOW_QUICK(0x100000, 10); i++) {
+		execbuf.flags = engines[rand() % nengine];
+		gem_execbuf(fd, &execbuf);
+	}
+
+	gem_sync(fd, object[1].handle);
+	gem_close(fd, object[1].handle);
+	gem_close(fd, object[0].handle);
+}
+
+igt_simple_main
+{
+	int fd;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	sync_loop(fd);
+
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+	close(fd);
+}
diff --git a/tests/i915/gem_ringfill.c b/tests/i915/gem_ringfill.c
new file mode 100644
index 00000000..c728e1cd
--- /dev/null
+++ b/tests/i915/gem_ringfill.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_ringfill.c
+ *
+ * This is a test of doing many tiny batchbuffer operations, in the hope of
+ * catching failure to manage the ring properly near full.
+ */
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_gt.h"
+#include "igt_vgem.h"
+#include "i915/gem_ring.h"
+
+#include <signal.h>
+#include <sys/ioctl.h>
+
+#define INTERRUPTIBLE 0x1
+#define HANG 0x2
+#define CHILD 0x8
+#define FORKED 0x8
+#define BOMB 0x10
+#define SUSPEND 0x20
+#define HIBERNATE 0x40
+#define NEWFD 0x80
+
+static unsigned int ring_size;
+
+static void check_bo(int fd, uint32_t handle)
+{
+	uint32_t *map;
+	int i;
+
+	igt_debug("Verifying result\n");
+	map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_READ);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0);
+	for (i = 0; i < 1024; i++)
+		igt_assert_eq(map[i], i);
+	munmap(map, 4096);
+}
+
+static void fill_ring(int fd,
+		      struct drm_i915_gem_execbuffer2 *execbuf,
+		      unsigned flags, unsigned timeout)
+{
+	/* The ring we've been using is 128k, and each rendering op
+	 * will use at least 8 dwords:
+	 *
+	 * BATCH_START
+	 * BATCH_START offset
+	 * MI_FLUSH
+	 * STORE_DATA_INDEX
+	 * STORE_DATA_INDEX offset
+	 * STORE_DATA_INDEX value
+	 * MI_USER_INTERRUPT
+	 * (padding)
+	 *
+	 * So iterate just a little more than that -- if we don't fill the ring
+	 * doing this, we aren't likely to with this test.
+	 */
+	igt_debug("Executing execbuf %d times\n", 128*1024/(8*4));
+	igt_until_timeout(timeout) {
+		igt_while_interruptible(flags & INTERRUPTIBLE) {
+			for (typeof(ring_size) i = 0; i < ring_size; i++)
+				gem_execbuf(fd, execbuf);
+		}
+	}
+}
+
+static int setup_execbuf(int fd,
+			 struct drm_i915_gem_execbuffer2 *execbuf,
+			 struct drm_i915_gem_exec_object2 *obj,
+			 struct drm_i915_gem_relocation_entry *reloc,
+			 unsigned int ring)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t *batch, *b;
+	int ret;
+	int i;
+
+	memset(execbuf, 0, sizeof(*execbuf));
+	memset(obj, 0, 2*sizeof(*obj));
+	memset(reloc, 0, 1024*sizeof(*reloc));
+
+	execbuf->buffers_ptr = to_user_pointer(obj);
+	execbuf->flags = ring | (1 << 11) | (1 << 12);
+
+	if (gen > 3 && gen < 6)
+		execbuf->flags |= I915_EXEC_SECURE;
+
+	obj[0].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[0].handle, 0, &bbe, sizeof(bbe));
+	execbuf->buffer_count = 1;
+	ret = __gem_execbuf(fd, execbuf);
+	if (ret)
+		return ret;
+
+	obj[0].flags |= EXEC_OBJECT_WRITE;
+	obj[1].handle = gem_create(fd, 1024*16 + 4096);
+
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+	obj[1].relocation_count = 1024;
+
+	batch = gem_mmap__cpu(fd, obj[1].handle, 0, 16*1024 + 4096,
+			      PROT_WRITE | PROT_READ);
+	gem_set_domain(fd, obj[1].handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+	b = batch;
+	for (i = 0; i < 1024; i++) {
+		uint64_t offset;
+
+		reloc[i].presumed_offset = obj[0].offset;
+		reloc[i].offset = (b - batch + 1) * sizeof(*batch);
+		reloc[i].delta = i * sizeof(uint32_t);
+		reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+		offset = obj[0].offset + reloc[i].delta;
+		*b++ = MI_STORE_DWORD_IMM;
+		if (gen >= 8) {
+			*b++ = offset;
+			*b++ = offset >> 32;
+		} else if (gen >= 4) {
+			if (gen < 6)
+				b[-1] |= 1 << 22;
+			*b++ = 0;
+			*b++ = offset;
+			reloc[i].offset += sizeof(*batch);
+		} else {
+			b[-1] |= 1 << 22;
+			b[-1] -= 1;
+			*b++ = offset;
+		}
+		*b++ = i;
+	}
+	*b++ = MI_BATCH_BUFFER_END;
+	munmap(batch, 16*1024+4096);
+
+	execbuf->buffer_count = 2;
+	gem_execbuf(fd, execbuf);
+
+	check_bo(fd, obj[0].handle);
+	return 0;
+}
+
+static void run_test(int fd, unsigned ring, unsigned flags, unsigned timeout)
+{
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[1024];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	igt_hang_t hang;
+
+	gem_require_ring(fd, ring);
+	igt_require(gem_can_store_dword(fd, ring));
+
+	if (flags & (SUSPEND | HIBERNATE))
+		run_test(fd, ring, 0, 0);
+
+	gem_quiescent_gpu(fd);
+	igt_require(setup_execbuf(fd, &execbuf, obj, reloc, ring) == 0);
+
+	memset(&hang, 0, sizeof(hang));
+	if (flags & HANG)
+		hang = igt_hang_ring(fd, ring & ~(3<<13));
+
+	if (flags & (CHILD | FORKED | BOMB)) {
+		int nchild;
+
+		if (flags & FORKED)
+			nchild = sysconf(_SC_NPROCESSORS_ONLN);
+		else if (flags & BOMB)
+			nchild = 8*sysconf(_SC_NPROCESSORS_ONLN);
+		else
+			nchild = 1;
+
+		igt_debug("Forking %d children\n", nchild);
+		igt_fork(child, nchild) {
+			if (flags & NEWFD) {
+				fd = drm_open_driver(DRIVER_INTEL);
+				setup_execbuf(fd, &execbuf, obj, reloc, ring);
+			}
+			fill_ring(fd, &execbuf, flags, timeout);
+		}
+
+		if (flags & SUSPEND)
+			igt_system_suspend_autoresume(SUSPEND_STATE_MEM,
+						      SUSPEND_TEST_NONE);
+
+		if (flags & HIBERNATE)
+			igt_system_suspend_autoresume(SUSPEND_STATE_DISK,
+						      SUSPEND_TEST_NONE);
+
+		if (flags & NEWFD)
+			fill_ring(fd, &execbuf, flags, timeout);
+
+		igt_waitchildren();
+	} else
+		fill_ring(fd, &execbuf, flags, timeout);
+
+	if (flags & HANG)
+		igt_post_hang_ring(fd, hang);
+	else
+		check_bo(fd, obj[0].handle);
+
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[0].handle);
+
+	gem_quiescent_gpu(fd);
+
+	if (flags & (SUSPEND | HIBERNATE))
+		run_test(fd, ring, 0, 0);
+}
+
+igt_main
+{
+	const struct {
+		const char *suffix;
+		unsigned flags;
+		unsigned timeout;
+		bool basic;
+	} modes[] = {
+		{ "", 0, 0, true},
+		{ "-interruptible", INTERRUPTIBLE, 1, true },
+		{ "-hang", HANG, 10, true },
+		{ "-child", CHILD, 0 },
+		{ "-forked", FORKED, 0, true },
+		{ "-fd", FORKED | NEWFD, 0, true },
+		{ "-bomb", BOMB | NEWFD | INTERRUPTIBLE, 150 },
+		{ "-S3", BOMB | SUSPEND, 30 },
+		{ "-S4", BOMB | HIBERNATE, 30 },
+		{ NULL }
+	}, *m;
+	bool master = false;
+	int fd = -1;
+
+	igt_fixture {
+		int gen;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		igt_require(gem_can_store_dword(fd, 0));
+		gen = intel_gen(intel_get_drm_devid(fd));
+		if (gen > 3 && gen < 6) { /* ctg and ilk need secure batches */
+			igt_device_set_master(fd);
+			master = true;
+		}
+
+		ring_size = gem_measure_ring_inflight(fd, ALL_ENGINES, 0);
+		igt_info("Ring size: %d batches\n", ring_size);
+		igt_require(ring_size);
+	}
+
+	for (m = modes; m->suffix; m++) {
+		const struct intel_execution_engine *e;
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_f("%s%s%s",
+				      m->basic && !e->exec_id ? "basic-" : "",
+				      e->name,
+				      m->suffix) {
+				igt_skip_on(m->flags & NEWFD && master);
+				if (m->flags & (HANG|SUSPEND|HIBERNATE))
+					igt_skip_on_simulation();
+				run_test(fd, e->exec_id | e->flags,
+					 m->flags, m->timeout);
+			}
+		}
+	}
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_set_tiling_vs_blt.c b/tests/i915/gem_set_tiling_vs_blt.c
new file mode 100644
index 00000000..ae1af4ca
--- /dev/null
+++ b/tests/i915/gem_set_tiling_vs_blt.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/** @file gem_set_tiling_vs_blt.c
+ *
+ * Testcase: Check for proper synchronization of tiling changes vs. tiled gpu
+ * access
+ *
+ * The blitter on gen3 and earlier needs properly set up fences. Which also
+ * means that for untiled blits we may not set up a fence before that blt has
+ * finished.
+ *
+ * Current kernels have a bug there, but it's pretty hard to hit because you
+ * need:
+ * - a blt on an untiled object which is aligned correctly for tiling.
+ * - a set_tiling to switch that object to tiling
+ * - another blt without any intervening cpu access that uses this object.
+ *
+ * Testcase has been extended to also check tiled->untiled and tiled->tiled
+ * transitions (i.e. changing stride).
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdbool.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Check for proper synchronization of tiling changes vs."
+		     " tiled gpu access.");
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+uint32_t devid;
+
+#define TEST_SIZE (1024*1024)
+#define TEST_STRIDE (4*1024)
+#define TEST_HEIGHT(stride)	(TEST_SIZE/(stride))
+#define TEST_WIDTH(stride)	((stride)/4)
+
+uint32_t data[TEST_SIZE/4];
+
+static void do_test(uint32_t tiling, unsigned stride,
+		    uint32_t tiling_after, unsigned stride_after)
+{
+	drm_intel_bo *busy_bo, *test_bo, *target_bo;
+	int i, ret;
+	uint32_t *ptr;
+	uint32_t test_bo_handle;
+	uint32_t blt_stride, blt_bits;
+	bool tiling_changed = false;
+
+	igt_info("filling ring .. ");
+	busy_bo = drm_intel_bo_alloc(bufmgr, "busy bo bo", 16*1024*1024, 4096);
+
+	for (i = 0; i < 250; i++) {
+		BLIT_COPY_BATCH_START(0);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  2*1024*4);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((2048) << 16 | (2048));
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(2*1024*4);
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+	intel_batchbuffer_flush(batch);
+
+	igt_info("playing tricks .. ");
+	/* first allocate the target so it gets out of the way of playing funky
+	 * tricks */
+	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", TEST_SIZE, 4096);
+
+	/* allocate buffer with parameters _after_ transition we want to check
+	 * and touch it, so that it's properly aligned in the gtt. */
+	test_bo = drm_intel_bo_alloc(bufmgr, "tiled busy bo", TEST_SIZE, 4096);
+	test_bo_handle = test_bo->handle;
+	ret = drm_intel_bo_set_tiling(test_bo, &tiling_after, stride_after);
+	igt_assert_eq(ret, 0);
+	drm_intel_gem_bo_map_gtt(test_bo);
+	ptr = test_bo->virtual;
+	*ptr = 0;
+	ptr = NULL;
+	drm_intel_gem_bo_unmap_gtt(test_bo);
+
+	drm_intel_bo_unreference(test_bo);
+
+	test_bo = NULL;
+
+	/* note we need a bo bigger than batches, otherwise the buffer reuse
+	 * trick will fail. */
+	test_bo = drm_intel_bo_alloc(bufmgr, "busy bo", TEST_SIZE, 4096);
+	/* double check that the reuse trick worked */
+	igt_assert(test_bo_handle == test_bo->handle);
+
+	test_bo_handle = test_bo->handle;
+	/* ensure we have the right tiling before we start. */
+	ret = drm_intel_bo_set_tiling(test_bo, &tiling, stride);
+	igt_assert_eq(ret, 0);
+
+	if (tiling == I915_TILING_NONE) {
+		drm_intel_bo_subdata(test_bo, 0, TEST_SIZE, data);
+	} else {
+		drm_intel_gem_bo_map_gtt(test_bo);
+		ptr = test_bo->virtual;
+		memcpy(ptr, data, TEST_SIZE);
+		ptr = NULL;
+		drm_intel_gem_bo_unmap_gtt(test_bo);
+	}
+
+	blt_stride = stride;
+	blt_bits = 0;
+	if (intel_gen(devid) >= 4 && tiling != I915_TILING_NONE) {
+		blt_stride /= 4;
+		blt_bits = XY_SRC_COPY_BLT_SRC_TILED;
+	}
+
+	BLIT_COPY_BATCH_START(blt_bits);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  stride);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH((TEST_HEIGHT(stride)) << 16 | (TEST_WIDTH(stride)));
+	OUT_RELOC_FENCED(target_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(blt_stride);
+	OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+	intel_batchbuffer_flush(batch);
+
+	drm_intel_bo_unreference(test_bo);
+
+	test_bo = drm_intel_bo_alloc_for_render(bufmgr, "tiled busy bo", TEST_SIZE, 4096);
+	/* double check that the reuse trick worked */
+	igt_assert(test_bo_handle == test_bo->handle);
+	ret = drm_intel_bo_set_tiling(test_bo, &tiling_after, stride_after);
+	igt_assert_eq(ret, 0);
+
+	/* Note: We don't care about gen4+ here because the blitter doesn't use
+	 * fences there. So not setting tiling flags on the tiled buffer is ok.
+	 */
+	BLIT_COPY_BATCH_START(0);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  stride_after);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH((1) << 16 | (1));
+	OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(stride_after);
+	OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+	intel_batchbuffer_flush(batch);
+
+	/* Now try to trick the kernel the kernel into changing up the fencing
+	 * too early. */
+
+	igt_info("checking .. ");
+	memset(data, 0, TEST_SIZE);
+	drm_intel_bo_get_subdata(target_bo, 0, TEST_SIZE, data);
+	for (i = 0; i < TEST_SIZE/4; i++)
+		igt_assert(data[i] == i);
+
+	/* check whether tiling on the test_bo actually changed. */
+	drm_intel_gem_bo_map_gtt(test_bo);
+	ptr = test_bo->virtual;
+	for (i = 0; i < TEST_SIZE/4; i++)
+		if (ptr[i] != data[i])
+			tiling_changed = true;
+	ptr = NULL;
+	drm_intel_gem_bo_unmap_gtt(test_bo);
+	igt_assert(tiling_changed);
+
+	drm_intel_bo_unreference(test_bo);
+	drm_intel_bo_unreference(target_bo);
+	drm_intel_bo_unreference(busy_bo);
+	igt_info("done\n");
+}
+
+int fd;
+
+igt_main
+{
+	int i;
+	uint32_t tiling, tiling_after;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		for (i = 0; i < 1024*256; i++)
+			data[i] = i;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+		devid = intel_get_drm_devid(fd);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+	}
+
+	igt_subtest("untiled-to-tiled") {
+		tiling = I915_TILING_NONE;
+		tiling_after = I915_TILING_X;
+		do_test(tiling, TEST_STRIDE, tiling_after, TEST_STRIDE);
+		igt_assert(tiling == I915_TILING_NONE);
+		igt_assert(tiling_after == I915_TILING_X);
+	}
+
+	igt_subtest("tiled-to-untiled") {
+		tiling = I915_TILING_X;
+		tiling_after = I915_TILING_NONE;
+		do_test(tiling, TEST_STRIDE, tiling_after, TEST_STRIDE);
+		igt_assert(tiling == I915_TILING_X);
+		igt_assert(tiling_after == I915_TILING_NONE);
+	}
+
+	igt_subtest("tiled-to-tiled") {
+		tiling = I915_TILING_X;
+		tiling_after = I915_TILING_X;
+		do_test(tiling, TEST_STRIDE/2, tiling_after, TEST_STRIDE);
+		igt_assert(tiling == I915_TILING_X);
+		igt_assert(tiling_after == I915_TILING_X);
+	}
+}
diff --git a/tests/i915/gem_set_tiling_vs_gtt.c b/tests/i915/gem_set_tiling_vs_gtt.c
new file mode 100644
index 00000000..2611ec55
--- /dev/null
+++ b/tests/i915/gem_set_tiling_vs_gtt.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Check set_tiling vs gtt mmap coherency.");
+
+#define OBJECT_SIZE (1024*1024)
+#define TEST_STRIDE (1024*4)
+
+/**
+ * Testcase: Check set_tiling vs gtt mmap coherency
+ */
+
+igt_simple_main
+{
+	int fd;
+	uint32_t *ptr;
+	uint32_t data[OBJECT_SIZE/4];
+	int i;
+	uint32_t handle;
+	bool tiling_changed;
+	int tile_height;
+
+	igt_skip_on_simulation();
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	if (IS_GEN2(intel_get_drm_devid(fd)))
+		tile_height = 16;
+	else
+		tile_height = 8;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	/* gtt coherency is done with set_domain in libdrm, don't break that */
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		ptr[i] = data[i] = i;
+
+	gem_set_tiling(fd, handle, I915_TILING_X, TEST_STRIDE);
+
+	igt_info("testing untiled->tiled\n");
+	tiling_changed = false;
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	/* Too lazy to check for the correct tiling, and impossible anyway on
+	 * bit17 swizzling machines. */
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		if (ptr[i] != data[i])
+			tiling_changed = true;
+	igt_assert(tiling_changed);
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		ptr[i] = data[i] = i;
+
+	gem_set_tiling(fd, handle, I915_TILING_X, TEST_STRIDE*2);
+
+	igt_info("testing tiled->tiled\n");
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	for (i = 0; i < OBJECT_SIZE/4; i++) {
+		int tile_row = i / (TEST_STRIDE * tile_height / 4);
+		int row = i / (TEST_STRIDE * 2 / 4);
+		int half = i & (TEST_STRIDE / 4);
+		int ofs = i % (TEST_STRIDE / 4);
+		int data_i = (tile_row/2)*(TEST_STRIDE * tile_height / 4)
+			+ row*TEST_STRIDE/4
+			+ half*tile_height + ofs;
+		uint32_t val = data[data_i];
+
+		igt_assert_f(ptr[i] == val,
+			     "mismatch at %i, row=%i, half=%i, ofs=%i, "
+			     "read: 0x%08x, expected: 0x%08x\n",
+			     i, row, half, ofs,
+			     ptr[i], val);
+
+	}
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		ptr[i] = data[i] = i;
+
+	gem_set_tiling(fd, handle, I915_TILING_NONE, 0);
+	igt_info("testing tiled->untiled\n");
+	tiling_changed = false;
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	/* Too lazy to check for the correct tiling, and impossible anyway on
+	 * bit17 swizzling machines. */
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		if (ptr[i] != data[i])
+			tiling_changed = true;
+	igt_assert(tiling_changed);
+
+	munmap(ptr, OBJECT_SIZE);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_set_tiling_vs_pwrite.c b/tests/i915/gem_set_tiling_vs_pwrite.c
new file mode 100644
index 00000000..f0126b64
--- /dev/null
+++ b/tests/i915/gem_set_tiling_vs_pwrite.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Check set_tiling vs pwrite coherency.");
+
+#define OBJECT_SIZE (1024*1024)
+#define TEST_STRIDE (1024*4)
+
+/**
+ * Testcase: Check set_tiling vs pwrite coherency
+ */
+
+igt_simple_main
+{
+	int fd;
+	uint32_t *ptr;
+	uint32_t data[OBJECT_SIZE/4];
+	int i;
+	uint32_t handle;
+
+	igt_skip_on_simulation();
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		data[i] = i;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	gem_set_tiling(fd, handle, I915_TILING_X, TEST_STRIDE);
+
+	/* touch it */
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	*ptr = 0xdeadbeef;
+
+	igt_info("testing pwrite on tiled buffer\n");
+	gem_write(fd, handle, 0, data, OBJECT_SIZE);
+	memset(data, 0, OBJECT_SIZE);
+	gem_read(fd, handle, 0, data, OBJECT_SIZE);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		igt_assert_eq_u32(data[i], i);
+
+	/* touch it before changing the tiling, so that the fence sticks around */
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	*ptr = 0xdeadbeef;
+
+	gem_set_tiling(fd, handle, I915_TILING_NONE, 0);
+
+	igt_info("testing pwrite on untiled, but still fenced buffer\n");
+	gem_write(fd, handle, 0, data, OBJECT_SIZE);
+	memset(data, 0, OBJECT_SIZE);
+	gem_read(fd, handle, 0, data, OBJECT_SIZE);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		igt_assert_eq_u32(data[i], i);
+
+	munmap(ptr, OBJECT_SIZE);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_shrink.c b/tests/i915/gem_shrink.c
new file mode 100644
index 00000000..c8e05814
--- /dev/null
+++ b/tests/i915/gem_shrink.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/** @file gem_shrink.c
+ *
+ * Exercise the shrinker by overallocating GEM objects
+ */
+
+#include "igt.h"
+#include "igt_gt.h"
+#include "igt_debugfs.h"
+#include "igt_sysfs.h"
+
+#ifndef MADV_FREE
+#define MADV_FREE 8
+#endif
+
+static unsigned int engines[16], nengine;
+
+static void get_pages(int fd, uint64_t alloc)
+{
+	uint32_t handle = gem_create(fd, alloc);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+}
+
+static void pwrite_(int fd, uint64_t alloc)
+{
+	uint32_t tmp;
+	uint32_t handle = gem_create(fd, alloc);
+	for (int page = 0; page < alloc>>12; page++)
+		gem_write(fd, handle, (page + page % 4095) & ~3, &tmp, 4);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+}
+
+static void pread_(int fd, uint64_t alloc)
+{
+	uint32_t tmp;
+	uint32_t handle = gem_create(fd, alloc);
+	for (int page = 0; page < alloc>>12; page++)
+		gem_read(fd, handle, (page + page % 4095) & ~3, &tmp, 4);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+}
+
+static void mmap_gtt(int fd, uint64_t alloc)
+{
+	uint32_t handle = gem_create(fd, alloc);
+	uint32_t *ptr = gem_mmap__gtt(fd, handle, alloc, PROT_WRITE);
+	for (int page = 0; page < alloc>>12; page++)
+		ptr[page<<10] = 0;
+	munmap(ptr, alloc);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+}
+
+static void mmap_cpu(int fd, uint64_t alloc)
+{
+	uint32_t handle = gem_create(fd, alloc);
+	uint32_t *ptr = gem_mmap__cpu(fd, handle, 0, alloc, PROT_WRITE);
+	for (int page = 0; page < alloc>>12; page++)
+		ptr[page<<10] = 0;
+	munmap(ptr, alloc);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+}
+
+static void execbuf1(int fd, uint64_t alloc)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+
+	memset(&obj, 0, sizeof(obj));
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+
+	obj.handle = gem_create(fd, alloc);
+	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(fd, &execbuf);
+	gem_madvise(fd, obj.handle, I915_MADV_DONTNEED);
+}
+
+/* Since we want to trigger oom (SIGKILL), we don't want small allocations
+ * to fail and generate a false error (SIGSEGV)! So we redirect allocations
+ * though GEM objects, which should be much more likely to trigger oom. There
+ * are still small allocations within the kernel, so still a small chance of
+ * ENOMEM instead of a full oom.
+ */
+static void *__gem_calloc(int fd, size_t count, size_t size, uint64_t *out_size)
+{
+	uint32_t handle;
+	uint64_t total;
+	void *ptr;
+
+	total = count * size;
+	total = (total + 4095) & -4096;
+
+	handle = gem_create(fd, total);
+	ptr = gem_mmap__cpu(fd, handle, 0, total, PROT_WRITE);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	gem_close(fd, handle);
+
+	*out_size = total;
+	return ptr;
+}
+
+static void execbufN(int fd, uint64_t alloc)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 *obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	int count = alloc >> 20;
+	uint64_t obj_size;
+
+	obj = __gem_calloc(fd, alloc + 1, sizeof(*obj), &obj_size);
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	obj[count].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[count].handle, 0, &bbe, sizeof(bbe));
+
+	for (int i = 1; i <= count; i++) {
+		int j = count - i;
+
+		obj[j].handle = gem_create(fd, 1 << 20);
+		execbuf.buffers_ptr = to_user_pointer(&obj[j]);
+		execbuf.buffer_count = i + 1;
+		gem_execbuf(fd, &execbuf);
+	}
+
+	for (int i = 0; i <= count; i++)
+		gem_madvise(fd, obj[i].handle, I915_MADV_DONTNEED);
+	munmap(obj, obj_size);
+}
+
+static void execbufX(int fd, uint64_t alloc)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 *obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	int count = alloc >> 20;
+	uint64_t obj_size;
+
+	obj = __gem_calloc(fd, alloc + 1, sizeof(*obj), &obj_size);
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	obj[count].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[count].handle, 0, &bbe, sizeof(bbe));
+
+	for (int i = 1; i <= count; i++) {
+		int j = count - i;
+
+		obj[j+1].flags = 0;
+
+		obj[j].handle = gem_create(fd, 1 << 20);
+		obj[j].flags = EXEC_OBJECT_WRITE;
+
+		execbuf.buffers_ptr = to_user_pointer(&obj[j]);
+		execbuf.buffer_count = i + 1;
+		execbuf.flags = engines[j % nengine];
+		gem_execbuf(fd, &execbuf);
+	}
+
+	for (int i = 0; i <= count; i++)
+		gem_madvise(fd, obj[i].handle, I915_MADV_DONTNEED);
+	munmap(obj, obj_size);
+}
+
+static void hang(int fd, uint64_t alloc)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 *obj;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	int count = alloc >> 20;
+	uint64_t obj_size;
+
+	obj = __gem_calloc(fd, alloc + 1, sizeof(*obj), &obj_size);
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	obj[count].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[count].handle, 0, &bbe, sizeof(bbe));
+
+	for (int i = 1; i <= count; i++) {
+		int j = count - i;
+
+		obj[j].handle = gem_create(fd, 1 << 20);
+		execbuf.buffers_ptr = to_user_pointer(&obj[j]);
+		execbuf.buffer_count = i + 1;
+		gem_execbuf(fd, &execbuf);
+	}
+
+	gem_close(fd, igt_hang_ring(fd, 0).spin->handle);
+	for (int i = 0; i <= count; i++)
+		gem_madvise(fd, obj[i].handle, I915_MADV_DONTNEED);
+	munmap(obj, obj_size);
+}
+
+static void userptr(int fd, uint64_t alloc)
+{
+	struct local_i915_gem_userptr userptr;
+	void *ptr;
+
+	igt_assert((alloc & 4095) == 0);
+
+	ptr = mmap(NULL, alloc,
+		   PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,
+		   -1, 0);
+	igt_assert(ptr != (void *)-1);
+
+	memset(&userptr, 0, sizeof(userptr));
+	userptr.user_size = alloc;
+	userptr.user_ptr = to_user_pointer(ptr);
+	do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
+
+	gem_set_domain(fd, userptr.handle, I915_GEM_DOMAIN_GTT, 0);
+
+	madvise(ptr, alloc, MADV_FREE);
+}
+
+static bool has_userptr(void)
+{
+	struct local_i915_gem_userptr userptr;
+	int fd = drm_open_driver(DRIVER_INTEL);
+	int err;
+
+	memset(&userptr, 0, sizeof(userptr));
+	userptr.user_size = 8192;
+	userptr.user_ptr = -4096;
+
+	err = 0;
+	if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr))
+		err = errno;
+
+	close(fd);
+
+	return err == EFAULT;
+}
+
+static void leak(int fd, uint64_t alloc)
+{
+	char *ptr;
+
+	ptr = mmap(NULL, alloc, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE | MAP_POPULATE,
+		   -1, 0);
+	if (ptr != (char *)-1)
+		return;
+
+	while (alloc) {
+		alloc -= 4096;
+		ptr[alloc] = 0;
+	}
+}
+
+#define SOLO 1
+#define USERPTR 2
+#define OOM 4
+
+static void run_test(int nchildren, uint64_t alloc,
+		     void (*func)(int, uint64_t), unsigned flags)
+{
+	const int timeout = flags & SOLO ? 1 : 20;
+
+	/* Each pass consumes alloc bytes and doesn't drop
+	 * its reference to object (i.e. calls
+	 * gem_madvise(DONTNEED) instead of gem_close()).
+	 * After nchildren passes we expect each process
+	 * to have enough objects to consume all of memory
+	 * if left unchecked.
+	 */
+
+	if (flags & SOLO)
+		nchildren = 1;
+
+	/* Background load */
+	if (flags & OOM) {
+		igt_fork(child, nchildren) {
+			igt_until_timeout(timeout) {
+				int fd = drm_open_driver(DRIVER_INTEL);
+				for (int pass = 0; pass < nchildren; pass++)
+					leak(fd, alloc);
+				close(fd);
+			}
+		}
+	}
+
+	if (flags & USERPTR) {
+		igt_require(has_userptr());
+		igt_fork(child, (nchildren + 1)/2) {
+			igt_until_timeout(timeout) {
+				int fd = drm_open_driver(DRIVER_INTEL);
+				for (int pass = 0; pass < nchildren; pass++)
+					userptr(fd, alloc);
+				close(fd);
+			}
+		}
+		nchildren = (nchildren + 1)/2;
+	}
+
+	/* Exercise major ioctls */
+	igt_fork(child, nchildren) {
+		igt_until_timeout(timeout) {
+			int fd = drm_open_driver(DRIVER_INTEL);
+			for (int pass = 0; pass < nchildren; pass++)
+				func(fd, alloc);
+			close(fd);
+		}
+	}
+	igt_waitchildren();
+}
+
+static void reclaim(unsigned engine, int timeout)
+{
+	const uint64_t timeout_100ms = 100000000LL;
+	int fd = drm_open_driver(DRIVER_INTEL);
+	int debugfs = igt_debugfs_dir(fd);
+	igt_spin_t *spin;
+	volatile uint32_t *shared;
+
+	shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(shared != MAP_FAILED);
+
+	igt_fork(child, sysconf(_SC_NPROCESSORS_ONLN)) {
+		do {
+			igt_sysfs_printf(debugfs, "i915_drop_caches",
+					"%d", DROP_BOUND | DROP_UNBOUND);
+		} while (!*shared);
+	}
+
+	spin = igt_spin_batch_new(fd, .engine = engine);
+	igt_until_timeout(timeout) {
+		igt_spin_t *next = __igt_spin_batch_new(fd, .engine = engine);
+
+		igt_spin_batch_set_timeout(spin, timeout_100ms);
+		gem_sync(fd, spin->handle);
+
+		igt_spin_batch_free(fd, spin);
+		spin = next;
+	}
+	igt_spin_batch_free(fd, spin);
+
+	*shared = 1;
+	igt_waitchildren();
+
+	munmap((void *)shared, 4096);
+	close(debugfs);
+	close(fd);
+}
+
+igt_main
+{
+	const struct test {
+		const char *name;
+		void (*func)(int, uint64_t);
+	} tests[] = {
+		{ "get-pages", get_pages },
+		{ "pwrite", pwrite_ },
+		{ "pread", pread_ },
+		{ "mmap-gtt", mmap_gtt },
+		{ "mmap-cpu", mmap_cpu },
+		{ "execbuf1", execbuf1 },
+		{ "execbufN", execbufN },
+		{ "execbufX", execbufX },
+		{ "hang", hang },
+		{ NULL },
+	};
+	const struct mode {
+		const char *suffix;
+		unsigned flags;
+	} modes[] = {
+		{ "-sanitycheck", SOLO },
+		{ "", 0 },
+		{ "-userptr", USERPTR },
+		{ "-oom", USERPTR | OOM },
+		{ NULL },
+	};
+	uint64_t alloc_size = 0;
+	int num_processes = 0;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		uint64_t mem_size = intel_get_total_ram_mb();
+		unsigned int engine;
+		int fd;
+
+		/* Spawn enough processes to use all memory, but each only
+		 * uses half the available mappable aperture ~128MiB.
+		 * Individually the processes would be ok, but en masse
+		 * we expect the shrinker to start purging objects,
+		 * and possibly fail.
+		 */
+		alloc_size = gem_mappable_aperture_size() / 2;
+		num_processes = 1 + (mem_size / (alloc_size >> 20));
+
+		igt_info("Using %d processes and %'lluMiB per process\n",
+			 num_processes, (long long)(alloc_size >> 20));
+
+		intel_require_memory(num_processes, alloc_size,
+				     CHECK_SWAP | CHECK_RAM);
+
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		nengine = 0;
+		for_each_engine(fd, engine)
+			engines[nengine++] = engine;
+		igt_require(nengine);
+
+		close(fd);
+	}
+
+	igt_subtest("reclaim")
+		reclaim(I915_EXEC_DEFAULT, 2);
+
+	for(const struct test *t = tests; t->name; t++) {
+		for(const struct mode *m = modes; m->suffix; m++) {
+			igt_subtest_f("%s%s", t->name, m->suffix)
+				run_test(num_processes, alloc_size,
+					 t->func, m->flags);
+		}
+	}
+}
diff --git a/tests/i915/gem_softpin.c b/tests/i915/gem_softpin.c
new file mode 100644
index 00000000..336008b8
--- /dev/null
+++ b/tests/i915/gem_softpin.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Vinay Belgaumkar <vinay.belgaumkar@intel.com>
+ *    Thomas Daniel <thomas.daniel@intel.com>
+ *
+ */
+
+#include "igt.h"
+
+#define EXEC_OBJECT_PINNED	(1<<4)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+
+/* gen8_canonical_addr
+ * Used to convert any address into canonical form, i.e. [63:48] == [47].
+ * Based on kernel's sign_extend64 implementation.
+ * @address - a virtual address
+*/
+#define GEN8_HIGH_ADDRESS_BIT 47
+static uint64_t gen8_canonical_addr(uint64_t address)
+{
+	__u8 shift = 63 - GEN8_HIGH_ADDRESS_BIT;
+	return (__s64)(address << shift) >> shift;
+}
+
+static void test_invalid(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&object);
+	execbuf.buffer_count = 1;
+
+	memset(&object, 0, sizeof(object));
+	object.handle = gem_create(fd, 2*4096);
+	object.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
+	gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+	/* Check invalid alignment */
+	object.offset = 4096;
+	object.alignment = 64*1024;
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+	object.alignment = 0;
+
+	/* Check wraparound */
+	object.offset = -4096ULL;
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+	/* Check beyond bounds of aperture */
+	object.offset = gem_aperture_size(fd) - 4096;
+	object.offset = gen8_canonical_addr(object.offset);
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+	/* Check gen8 canonical addressing */
+	if (gem_aperture_size(fd) > 1ull<<GEN8_HIGH_ADDRESS_BIT) {
+		object.offset = 1ull << GEN8_HIGH_ADDRESS_BIT;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+		object.offset = gen8_canonical_addr(object.offset);
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), 0);
+	}
+
+	/* Check extended range */
+	if (gem_aperture_size(fd) > 1ull<<32) {
+		object.flags = EXEC_OBJECT_PINNED;
+		object.offset = 1ull<<32;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+		object.offset = gen8_canonical_addr(object.offset);
+		object.flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), 0);
+	}
+}
+
+static void test_softpin(int fd)
+{
+	const uint32_t size = 1024 * 1024;
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object;
+	uint64_t offset, end;
+	uint32_t last_handle;
+	int loop;
+
+	last_handle = gem_create(fd, size);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&object);
+	execbuf.buffer_count = 1;
+	for (loop = 0; loop < 1024; loop++) {
+		memset(&object, 0, sizeof(object));
+		object.handle = gem_create(fd, 2*size);
+		gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+		/* Find a hole */
+		gem_execbuf(fd, &execbuf);
+		gem_close(fd, object.handle);
+		gem_close(fd, last_handle);
+
+		igt_debug("Made a 2 MiB hole: %08llx\n",
+			  object.offset);
+
+		object.handle = gem_create(fd, size);
+		gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+		object.flags |= EXEC_OBJECT_PINNED;
+
+		end = object.offset + size;
+		for (offset = object.offset; offset <= end; offset += 4096) {
+			object.offset = offset;
+			gem_execbuf(fd, &execbuf);
+			igt_assert_eq_u64(object.offset, offset);
+		}
+
+		last_handle = object.handle;
+	}
+}
+
+static void test_overlap(int fd)
+{
+	const uint32_t size = 1024 * 1024;
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object[2];
+	uint64_t offset;
+	uint32_t handle;
+
+	handle = gem_create(fd, 3*size);
+	gem_write(fd, handle, 0, &bbe, sizeof(bbe));
+
+	memset(object, 0, sizeof(object));
+	object[0].handle = handle;
+
+	/* Find a hole */
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(object);
+	execbuf.buffer_count = 1;
+	gem_execbuf(fd, &execbuf);
+
+	igt_debug("Made a 3x1 MiB hole: %08llx\n",
+		  object[0].offset);
+
+	object[0].handle = gem_create(fd, size);
+	object[0].offset += size;
+	object[0].flags |= EXEC_OBJECT_PINNED;
+	object[1].handle = gem_create(fd, size);
+	object[1].flags |= EXEC_OBJECT_PINNED;
+	gem_write(fd, object[1].handle, 0, &bbe, sizeof(bbe));
+	execbuf.buffer_count = 2;
+
+	/* Check that we fit into our hole */
+	object[1].offset = object[0].offset - size;
+	gem_execbuf(fd, &execbuf);
+	igt_assert_eq_u64(object[1].offset + size, object[0].offset);
+
+	object[1].offset = object[0].offset + size;
+	gem_execbuf(fd, &execbuf);
+	igt_assert_eq_u64(object[1].offset - size, object[0].offset);
+
+	/* Try all possible page-aligned overlaps */
+	for (offset = object[0].offset - size + 4096;
+	     offset < object[0].offset + size;
+	     offset += 4096) {
+		object[1].offset = offset;
+		igt_debug("[0]=[%08llx - %08llx] [1]=[%08llx - %08llx]\n",
+			  (long long)object[0].offset,
+			  (long long)object[0].offset + size,
+			  (long long)object[1].offset,
+			  (long long)object[1].offset + size);
+		igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+		igt_assert_eq_u64(object[1].offset, offset);
+	}
+
+	gem_close(fd, object[1].handle);
+	gem_close(fd, object[0].handle);
+	gem_close(fd, handle);
+}
+
+static uint64_t busy_batch(int fd)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const int has_64bit_reloc = gen >= 8;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object[2];
+	uint32_t *map;
+	int factor = 10;
+	int i = 0;
+
+	memset(object, 0, sizeof(object));
+	object[0].handle = gem_create(fd, 1024*1024);
+	object[1].handle = gem_create(fd, 4096);
+	map = gem_mmap__cpu(fd, object[1].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(fd, object[1].handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+	*map = MI_BATCH_BUFFER_END;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(object);
+	execbuf.buffer_count = 2;
+	if (gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+	gem_execbuf(fd, &execbuf);
+
+	igt_debug("Active offsets = [%08llx, %08llx]\n",
+		  object[0].offset, object[1].offset);
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+	gem_set_domain(fd, object[1].handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	while (factor--) {
+		/* XY_SRC_COPY */
+		map[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+		if (has_64bit_reloc)
+			map[i-1] += 2;
+		map[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (4*1024);
+		map[i++] = 0;
+		map[i++] = 256 << 16 | 1024;
+		map[i++] = object[0].offset;
+		if (has_64bit_reloc)
+			map[i++] = object[0].offset >> 32;
+		map[i++] = 0;
+		map[i++] = 4096;
+		map[i++] = object[0].offset;
+		if (has_64bit_reloc)
+			map[i++] = object[0].offset >> 32;
+	}
+	map[i++] = MI_BATCH_BUFFER_END;
+	munmap(map, 4096);
+
+	object[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+	object[1].flags = EXEC_OBJECT_PINNED;
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, object[0].handle);
+	gem_close(fd, object[1].handle);
+
+	return object[1].offset;
+}
+
+static void test_evict_active(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object;
+	uint64_t expected;
+
+	memset(&object, 0, sizeof(object));
+	object.handle = gem_create(fd, 4096);
+	gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&object);
+	execbuf.buffer_count = 1;
+
+	expected = busy_batch(fd);
+	object.offset = expected;
+	object.flags = EXEC_OBJECT_PINNED;
+
+	/* Replace the active batch with ourselves, forcing an eviction */
+	gem_execbuf(fd, &execbuf);
+	igt_assert_eq_u64(object.offset, expected);
+
+	gem_close(fd, object.handle);
+}
+
+static void test_evict_snoop(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object[2];
+	uint64_t hole;
+
+	igt_require(!gem_has_llc(fd));
+	igt_require(!gem_uses_ppgtt(fd));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(object);
+	execbuf.buffer_count = 1;
+
+	/* Find a hole */
+	memset(object, 0, sizeof(object));
+	object[0].handle = gem_create(fd, 5*4096);
+	gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, object[0].handle);
+	hole = object[0].offset + 4096;
+
+	/* Create a snoop + uncached pair */
+	object[0].handle = gem_create(fd, 4096);
+	object[0].flags = EXEC_OBJECT_PINNED;
+	gem_set_caching(fd, object[0].handle, 1);
+	object[1].handle = gem_create(fd, 4096);
+	object[1].flags = EXEC_OBJECT_PINNED;
+	gem_write(fd, object[1].handle, 4096-sizeof(bbe), &bbe, sizeof(bbe));
+	execbuf.buffer_count = 2;
+
+	/* snoop abutting before uncached -> error */
+	object[0].offset = hole;
+	object[1].offset = hole + 4096;
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+	/* snoop abutting after uncached -> error */
+	object[0].offset = hole + 4096;
+	object[1].offset = hole;
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL);
+
+	/* with gap -> okay */
+	object[0].offset = hole + 2*4096;
+	object[1].offset = hole;
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), 0);
+
+	/* And we should force the snoop away (or the GPU may hang) */
+	object[0].flags = 0;
+	object[1].offset = hole + 4096;
+	igt_assert_eq(__gem_execbuf(fd, &execbuf), 0);
+	igt_assert(object[0].offset != hole);
+	igt_assert(object[0].offset != hole + 2*4096);
+
+	gem_close(fd, object[0].handle);
+	gem_close(fd, object[1].handle);
+}
+
+static void test_evict_hang(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object;
+	igt_hang_t hang;
+	uint64_t expected;
+
+	memset(&object, 0, sizeof(object));
+	object.handle = gem_create(fd, 4096);
+	gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&object);
+	execbuf.buffer_count = 1;
+
+	hang = igt_hang_ctx(fd, 0, 0, 0);
+	expected = hang.spin->obj[1].offset;
+
+	/* Replace the hung batch with ourselves, forcing an eviction */
+	object.offset = expected;
+	object.flags = EXEC_OBJECT_PINNED;
+	gem_execbuf(fd, &execbuf);
+	igt_assert_eq_u64(object.offset, expected);
+
+	igt_post_hang_ring(fd, hang);
+	gem_close(fd, object.handle);
+}
+
+static void xchg_offset(void *array, unsigned i, unsigned j)
+{
+	struct drm_i915_gem_exec_object2 *object = array;
+	uint64_t tmp = object[i].offset;
+	object[i].offset = object[j].offset;
+	object[j].offset = tmp;
+}
+
+enum sleep { NOSLEEP, SUSPEND, HIBERNATE };
+static void test_noreloc(int fd, enum sleep sleep)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const uint32_t size = 4096;
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object[257];
+	uint64_t offset;
+	uint32_t handle;
+	uint32_t *batch, *b;
+	int i, loop;
+
+	handle = gem_create(fd, (ARRAY_SIZE(object)+1)*size);
+	gem_write(fd, handle, 0, &bbe, sizeof(bbe));
+
+	memset(object, 0, sizeof(object));
+	object[0].handle = handle;
+
+	/* Find a hole */
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(object);
+	execbuf.buffer_count = 1;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, object[0].handle);
+
+	igt_debug("Made a %dx%d KiB hole: %08llx\n",
+		  (int)ARRAY_SIZE(object), size/1024, object[0].offset);
+
+	offset = object[0].offset;
+	for (i = 0; i < ARRAY_SIZE(object) - 1; i++) {
+		object[i].handle = gem_create(fd, size);
+		object[i].offset = offset + i*size;
+		object[i].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+	}
+	object[i].handle = gem_create(fd, 2*size);
+	object[i].offset = offset + i*size;
+	object[i].flags = EXEC_OBJECT_PINNED;
+
+	b = batch = gem_mmap__cpu(fd, object[i].handle, 0, 2*size, PROT_WRITE);
+	gem_set_domain(fd, object[i].handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+	for (i = 0; i < ARRAY_SIZE(object) - 1; i++) {
+		*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			*b++ = object[i].offset;
+			*b++ = object[i].offset >> 32;
+		} else if (gen >= 4) {
+			*b++ = 0;
+			*b++ = object[i].offset;
+		} else {
+			b[-1]--;
+			*b++ = object[i].offset;
+		}
+		*b++ = i;
+	}
+	*b++ = MI_BATCH_BUFFER_END;
+	igt_assert(b - batch <= 2*size/sizeof(uint32_t));
+	munmap(batch, size);
+
+	execbuf.buffer_count = ARRAY_SIZE(object);
+	for (loop = 0; loop < 1024; loop++) {
+		igt_permute_array(object, ARRAY_SIZE(object)-1, xchg_offset);
+		gem_execbuf(fd, &execbuf);
+
+		if ((loop & 127) == 0) {
+			switch (sleep) {
+			case NOSLEEP:
+				break;
+			case SUSPEND:
+				igt_system_suspend_autoresume(SUSPEND_STATE_MEM,
+							      SUSPEND_TEST_NONE);
+				break;
+			case HIBERNATE:
+				igt_system_suspend_autoresume(SUSPEND_STATE_DISK,
+							      SUSPEND_TEST_NONE);
+				break;
+			}
+		}
+
+		for (i = 0; i < ARRAY_SIZE(object) - 1; i++) {
+			uint32_t val;
+
+			gem_read(fd, object[i].handle, 0, &val, sizeof(val));
+			igt_assert_eq(val, (object[i].offset - offset)/size);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(object); i++)
+		gem_close(fd, object[i].handle);
+}
+
+igt_main
+{
+	int fd = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(fd);
+		igt_require(gem_has_softpin(fd));
+		igt_require(gem_can_store_dword(fd, 0));
+	}
+
+	igt_subtest("invalid")
+		test_invalid(fd);
+	igt_subtest("softpin")
+		test_softpin(fd);
+	igt_subtest("overlap")
+		test_overlap(fd);
+
+	igt_subtest("noreloc")
+		test_noreloc(fd, NOSLEEP);
+	igt_subtest("noreloc-interruptible")
+		igt_while_interruptible(true) test_noreloc(fd, NOSLEEP);
+	igt_subtest("noreloc-S3")
+		test_noreloc(fd, SUSPEND);
+	igt_subtest("noreloc-S4")
+		test_noreloc(fd, HIBERNATE);
+
+	for (int signal = 0; signal <= 1; signal++) {
+		igt_subtest_f("evict-active%s", signal ? "-interruptible" : "")
+			igt_while_interruptible(signal) test_evict_active(fd);
+		igt_subtest_f("evict-snoop%s", signal ? "-interruptible" : "")
+			igt_while_interruptible(signal) test_evict_snoop(fd);
+	}
+	igt_subtest("evict-hang")
+		test_evict_hang(fd);
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_spin_batch.c b/tests/i915/gem_spin_batch.c
new file mode 100644
index 00000000..52410010
--- /dev/null
+++ b/tests/i915/gem_spin_batch.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+
+#define MAX_ERROR 5 /* % */
+
+#define assert_within_epsilon(x, ref, tolerance) \
+	igt_assert_f(100 * x <= (100 + tolerance) * ref && \
+		     100 * x >= (100 - tolerance) * ref, \
+		     "'%s' != '%s' (%lld not within %d%% tolerance of %lld)\n",\
+		     #x, #ref, (long long)x, tolerance, (long long)ref)
+
+static void spin(int fd, unsigned int engine, unsigned int timeout_sec)
+{
+	const uint64_t timeout_100ms = 100000000LL;
+	unsigned long loops = 0;
+	igt_spin_t *spin;
+	struct timespec tv = { };
+	struct timespec itv = { };
+	uint64_t elapsed;
+
+	spin = __igt_spin_batch_new(fd, .engine = engine);
+	while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout_sec) {
+		igt_spin_t *next = __igt_spin_batch_new(fd, .engine = engine);
+
+		igt_spin_batch_set_timeout(spin,
+					   timeout_100ms - igt_nsec_elapsed(&itv));
+		gem_sync(fd, spin->handle);
+		igt_debug("loop %lu: interval=%fms (target 100ms), elapsed %fms\n",
+			  loops,
+			  igt_nsec_elapsed(&itv) * 1e-6,
+			  igt_nsec_elapsed(&tv) * 1e-6);
+		memset(&itv, 0, sizeof(itv));
+
+		igt_spin_batch_free(fd, spin);
+		spin = next;
+		loops++;
+	}
+	igt_spin_batch_free(fd, spin);
+
+	igt_info("Completed %ld loops in %lld ns, target %ld\n",
+		 loops, (long long)elapsed, (long)(elapsed / timeout_100ms));
+
+	assert_within_epsilon(timeout_100ms * loops, elapsed, MAX_ERROR);
+}
+
+static void spin_exit_handler(int sig)
+{
+	igt_terminate_spin_batches();
+}
+
+static void spin_on_all_engines(int fd, unsigned int timeout_sec)
+{
+	unsigned engine;
+
+	for_each_physical_engine(fd, engine) {
+		igt_fork(child, 1) {
+			igt_install_exit_handler(spin_exit_handler);
+			spin(fd, engine, timeout_sec);
+		}
+	}
+
+	igt_waitchildren();
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		igt_fork_hang_detector(fd);
+		intel_detect_and_clear_missed_interrupts(fd);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("basic-%s", e->name) {
+			intel_detect_and_clear_missed_interrupts(fd);
+			spin(fd, e->exec_id, 3);
+			igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+		}
+	}
+
+	igt_subtest("spin-each") {
+		intel_detect_and_clear_missed_interrupts(fd);
+		spin_on_all_engines(fd, 3);
+		igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_stolen.c b/tests/i915/gem_stolen.c
new file mode 100644
index 00000000..1d489976
--- /dev/null
+++ b/tests/i915/gem_stolen.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
+ *
+ */
+
+/** @file gem_create_stolen.c
+ *
+ * This is a test for the extended gem_create ioctl, that includes allocation
+ * of object from stolen memory.
+ *
+ * The goal is to simply ensure the basics work, and invalid input combinations
+ * are rejected.
+ */
+
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <getopt.h>
+
+#include <drm.h>
+
+#include "ioctl_wrappers.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_io.h"
+#include "intel_chipset.h"
+#include "igt_aux.h"
+#include "drmtest.h"
+#include "drm.h"
+#include "i915_drm.h"
+
+IGT_TEST_DESCRIPTION("This test verifies the exetended gem_create ioctl,"
+		     " that includes allocation of obj from stolen region");
+#define CLEAR(s) memset(&s, 0, sizeof(s))
+#define SIZE 1024*1024
+#define DWORD_SIZE 4
+#define DATA 0xdead
+#define LARGE_SIZE 0xffffffff
+#define MAX_OBJECTS 100
+
+static drm_intel_bufmgr *bufmgr;
+static struct intel_batchbuffer *batch;
+
+static void verify_copy_op(drm_intel_bo *src, drm_intel_bo *dest)
+{
+	uint32_t *virt, i, ret;
+	/* Fill the src BO with dwords */
+	ret = drm_intel_gem_bo_map_gtt(src);
+	igt_assert(!ret);
+
+	virt = src->virtual;
+	for (i = 0; i < SIZE/DWORD_SIZE; i++)
+		virt[i] = i;
+
+	intel_copy_bo(batch, dest, src, SIZE);
+
+	ret = drm_intel_gem_bo_map_gtt(dest);
+	igt_assert(!ret);
+
+	virt = dest->virtual;
+	/* verify */
+	for (i = 0; i < SIZE/DWORD_SIZE; i++)
+		igt_assert_eq(virt[i], i);
+
+	drm_intel_bo_unmap(src);
+	drm_intel_bo_unmap(dest);
+}
+
+static void stolen_pwrite(int fd)
+{
+	drm_intel_bo *bo;
+	uint32_t buf[SIZE/DWORD_SIZE];
+	uint32_t handle = 0;
+	uint32_t *virt;
+	int i, ret = 0;
+
+	for (i = 0; i < SIZE/DWORD_SIZE; i++)
+		buf[i] = DATA;
+
+	gem_require_stolen_support(fd);
+
+	handle = gem_create_stolen(fd, SIZE);
+
+	gem_write(fd, handle, 0, buf, SIZE);
+	bo = gem_handle_to_libdrm_bo(bufmgr, fd, "bo", handle);
+
+	ret = drm_intel_gem_bo_map_gtt(bo);
+	igt_assert(!ret);
+
+	virt = bo->virtual;
+
+	for (i = 0; i < SIZE/DWORD_SIZE; i++)
+		igt_assert_eq(virt[i], DATA);
+
+	drm_intel_bo_unmap(bo);
+	drm_intel_bo_unreference(bo);
+	gem_close(fd, handle);
+}
+
+static void stolen_pread(int fd)
+{
+	drm_intel_bo *bo;
+	uint32_t buf[SIZE/DWORD_SIZE];
+	uint32_t handle = 0;
+	uint32_t *virt;
+	int i, ret = 0;
+
+	CLEAR(buf);
+
+	gem_require_stolen_support(fd);
+
+	handle = gem_create_stolen(fd, SIZE);
+
+	bo = gem_handle_to_libdrm_bo(bufmgr, fd, "bo", handle);
+
+	ret = drm_intel_gem_bo_map_gtt(bo);
+	igt_assert(!ret);
+
+	virt = bo->virtual;
+
+	for (i = 0; i < SIZE/DWORD_SIZE; i++)
+		virt[i] = DATA;
+
+	drm_intel_bo_unmap(bo);
+	drm_intel_bo_unreference(bo);
+
+	gem_read(fd, handle, 0, buf, SIZE);
+
+	for (i = 0; i < SIZE/DWORD_SIZE; i++)
+		igt_assert_eq(buf[i], DATA);
+
+	gem_close(fd, handle);
+}
+
+static void copy_test(int fd)
+{
+	drm_intel_bo *src, *dest;
+	uint32_t src_handle = 0, dest_handle = 0;
+
+	gem_require_stolen_support(fd);
+
+	src_handle = gem_create_stolen(fd, SIZE);
+	dest_handle = gem_create_stolen(fd, SIZE);
+
+	src = gem_handle_to_libdrm_bo(bufmgr, fd, "src_bo", src_handle);
+	dest = gem_handle_to_libdrm_bo(bufmgr, fd, "dst_bo", dest_handle);
+
+	igt_assert(src != NULL);
+	igt_assert(dest != NULL);
+
+	verify_copy_op(src, dest);
+
+	drm_intel_bo_unreference(src);
+	drm_intel_bo_unreference(dest);
+	gem_close(fd, src_handle);
+	gem_close(fd, dest_handle);
+}
+
+static void verify_object_clear(int fd)
+{
+	drm_intel_bo *bo;
+	uint32_t handle = 0;
+	uint32_t *virt;
+	int i, ret;
+
+	gem_require_stolen_support(fd);
+
+	handle = gem_create_stolen(fd, SIZE);
+
+	bo = gem_handle_to_libdrm_bo(bufmgr, fd, "verify_bo", handle);
+	igt_assert(bo != NULL);
+
+	ret = drm_intel_gem_bo_map_gtt(bo);
+	igt_assert(!ret);
+
+	/* Verify if the BO is zeroed */
+	virt = bo->virtual;
+	for (i = 0; i < SIZE / DWORD_SIZE; i++)
+		igt_assert(!virt[i]);
+
+	drm_intel_bo_unmap(bo);
+	drm_intel_bo_unreference(bo);
+	gem_close(fd, handle);
+}
+
+static void stolen_large_obj_alloc(int fd)
+{
+	uint32_t handle = 0;
+
+	gem_require_stolen_support(fd);
+	handle = __gem_create_stolen(fd, (unsigned long long) LARGE_SIZE + 4096);
+	igt_assert(!handle);
+}
+
+static void stolen_fill_purge_test(int fd)
+{
+	drm_intel_bo *bo;
+	int obj_count = 0, i = 0;
+	int _ret = 0, j = 0;
+	uint32_t handle[MAX_OBJECTS];
+	uint32_t new_handle;
+	uint32_t *virt;
+	int retained;
+
+	gem_require_stolen_support(fd);
+
+	/* Exhaust Stolen space */
+	do {
+		handle[i] = __gem_create_stolen(fd, SIZE);
+		if (handle[i] != 0) {
+			bo = gem_handle_to_libdrm_bo(bufmgr, fd,
+						     "verify_bo", handle[i]);
+			igt_assert(bo != NULL);
+
+			_ret = drm_intel_gem_bo_map_gtt(bo);
+			igt_assert(!_ret);
+
+			virt = bo->virtual;
+			for (j = 0; j < SIZE/DWORD_SIZE; j++)
+				virt[j] = DATA;
+
+			drm_intel_bo_unmap(bo);
+			drm_intel_bo_unreference(bo);
+
+			obj_count++;
+		}
+
+		i++;
+	} while (handle[i-1] && i < MAX_OBJECTS);
+
+	igt_assert(obj_count > 0);
+
+	/* Mark all stolen objects purgeable */
+	for (i = 0; i < obj_count; i++)
+		retained = gem_madvise(fd, handle[i], I915_MADV_DONTNEED);
+
+	/* Try to allocate one more object */
+	new_handle = gem_create_stolen(fd, SIZE);
+
+	/* Check if the retained object's memory contents are intact */
+	for (i = 0; i < obj_count; i++) {
+		retained = gem_madvise(fd, handle[i], I915_MADV_WILLNEED);
+		if (retained) {
+			bo = gem_handle_to_libdrm_bo(bufmgr, fd,
+						     "verify_bo", handle[i]);
+			igt_assert(bo != NULL);
+
+			_ret = drm_intel_gem_bo_map_gtt(bo);
+			igt_assert(!_ret);
+
+			virt = bo->virtual;
+			for (j = 0; j < SIZE/DWORD_SIZE; j++)
+				igt_assert_eq(virt[j], DATA);
+
+			drm_intel_bo_unmap(bo);
+			drm_intel_bo_unreference(bo);
+		}
+	}
+
+	gem_close(fd, new_handle);
+	for (i = 0; i < obj_count; i++)
+		gem_close(fd, handle[i]);
+}
+
+static void stolen_hibernate(int fd)
+{
+	drm_intel_bo *bo;
+	drm_intel_bo *src, *dest;
+	int obj_count = 0, i = 0;
+	int ret, j;
+	uint32_t handle[MAX_OBJECTS], src_handle;
+	uint32_t *virt;
+
+	gem_require_stolen_support(fd);
+
+	src_handle = gem_create(fd, SIZE);
+	src = gem_handle_to_libdrm_bo(bufmgr, fd,
+				     "bo", src_handle);
+	igt_assert(src != NULL);
+
+	ret = drm_intel_gem_bo_map_gtt(src);
+	igt_assert_eq(ret, 0);
+
+	virt = src->virtual;
+	for (j = 0; j < SIZE/DWORD_SIZE; j++) {
+		igt_assert_eq(virt[j], 0);
+		virt[j] = j;
+	}
+
+	drm_intel_bo_unmap(src);
+	/* Exhaust Stolen space */
+	for (i = 0; i < MAX_OBJECTS; i++) {
+		handle[i] = __gem_create_stolen(fd, SIZE);
+		if (!handle[i])
+			break;
+
+		bo = gem_handle_to_libdrm_bo(bufmgr, fd,
+					     "verify_bo", handle[i]);
+		igt_assert(bo != NULL);
+		ret = drm_intel_gem_bo_map_gtt(bo);
+		igt_assert_eq(ret, 0);
+
+		virt = bo->virtual;
+		for (j = 0; j < SIZE/DWORD_SIZE; j++)
+			igt_assert_eq(virt[j], 0);
+
+		drm_intel_bo_unmap(bo);
+		drm_intel_bo_unreference(bo);
+
+		obj_count++;
+	}
+
+	/* Assert if atleast one object is allocated from stolen, that
+	 * is good enough to verify the content preservation across
+	 * hibernation.
+	 */
+	igt_assert(obj_count > 0);
+
+	/* Copy data to all stolen backed objects */
+	for (i = 0; i < obj_count; i++) {
+		dest = gem_handle_to_libdrm_bo(bufmgr, fd,
+					       "dst_bo", handle[i]);
+		igt_assert(dest != NULL);
+		/* Copy contents to stolen backed objects via blt and
+		 * verify post-hibernation, this also helps in identifying
+		 * that the operation was completed before going to
+		 * hibernation.
+		 */
+		intel_copy_bo(batch, dest, src, SIZE);
+	}
+
+	drm_intel_bo_unreference(src);
+
+	igt_system_suspend_autoresume(SUSPEND_STATE_DISK, SUSPEND_TEST_NONE);
+	/* Check if the object's memory contents are intact
+	 * across hibernation.
+	 */
+	for (i = 0; i < obj_count; i++) {
+		bo = gem_handle_to_libdrm_bo(bufmgr, fd,
+					     "verify_bo", handle[i]);
+		igt_assert(bo != NULL);
+		ret = drm_intel_gem_bo_map_gtt(bo);
+		igt_assert_eq(ret, 0);
+		virt = bo->virtual;
+		for (j = 0; j < SIZE/DWORD_SIZE; j++)
+			igt_assert_eq(virt[j], j);
+
+		drm_intel_bo_unmap(bo);
+		drm_intel_bo_unreference(bo);
+	}
+
+	gem_close(fd, src_handle);
+	for (i = 0; i < obj_count; i++)
+		gem_close(fd, handle[i]);
+}
+
+static void
+stolen_no_mmap(int fd)
+{
+	void *addr;
+	uint32_t handle = 0;
+
+	gem_require_stolen_support(fd);
+
+	handle = gem_create_stolen(fd, SIZE);
+
+	addr = __gem_mmap__cpu(fd, handle, 0, SIZE, PROT_READ | PROT_WRITE);
+	igt_assert(addr == NULL);
+
+	gem_close(fd, handle);
+}
+
+igt_main
+{
+	int fd;
+	uint32_t devid;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		devid = intel_get_drm_devid(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+	}
+
+	igt_subtest("stolen-clear")
+		verify_object_clear(fd);
+
+	/*
+	 * stolen mem special cases - checking for non cpu mappable
+	 */
+	igt_subtest("stolen-no-mmap")
+		stolen_no_mmap(fd);
+
+	/* checking for pread/pwrite interfaces */
+	igt_subtest("stolen-pwrite")
+		stolen_pwrite(fd);
+
+	igt_subtest("stolen-pread")
+		stolen_pread(fd);
+
+	/* Functional Test - blt copy */
+	igt_subtest("stolen-copy")
+		copy_test(fd);
+
+	igt_subtest("large-object-alloc")
+		stolen_large_obj_alloc(fd);
+
+	/* Filling stolen completely and marking all the objects
+	 * purgeable. Then trying to add one more object, to verify
+	 * the purging logic.
+	 * Again marking all objects WILLNEED and verifying the
+	 * contents of the retained objects.
+	 */
+	igt_subtest("stolen-fill-purge")
+		stolen_fill_purge_test(fd);
+
+	igt_subtest("stolen-hibernate")
+		stolen_hibernate(fd);
+
+	igt_fixture {
+		intel_batchbuffer_free(batch);
+		drm_intel_bufmgr_destroy(bufmgr);
+	}
+}
diff --git a/tests/i915/gem_storedw_batches_loop.c b/tests/i915/gem_storedw_batches_loop.c
new file mode 100644
index 00000000..c0343ccb
--- /dev/null
+++ b/tests/i915/gem_storedw_batches_loop.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+static drm_intel_bufmgr *bufmgr;
+static drm_intel_bo *target_bo;
+static int has_ppgtt = 0;
+
+#define SECURE_DISPATCH (1<<0)
+
+/* Like the store dword test, but we create new command buffers each time */
+static void
+store_dword_loop(int divider, unsigned flags)
+{
+	int cmd, i, val = 0;
+	uint32_t *buf;
+	drm_intel_bo *cmd_bo;
+
+	igt_info("running storedw loop with stall every %i batch\n", divider);
+
+	cmd = MI_STORE_DWORD_IMM;
+	if (!has_ppgtt)
+		cmd |= MI_MEM_VIRTUAL;
+
+	for (i = 0; i < SLOW_QUICK(0x2000, 4); i++) {
+		int j = 0;
+		int cmd_address_offset;
+		cmd_bo = drm_intel_bo_alloc(bufmgr, "cmd bo", 4096, 4096);
+		igt_assert(cmd_bo);
+
+		/* Upload through cpu mmaps to make sure we don't have a gtt
+		 * mapping which could paper over secure batch submission
+		 * failing to bind that. */
+		drm_intel_bo_map(cmd_bo, 1);
+		buf = cmd_bo->virtual;
+
+		buf[j++] = cmd;
+		if (intel_gen(drm_intel_bufmgr_gem_get_devid(bufmgr)) >= 8) {
+			cmd_address_offset = j * 4;
+			buf[j++] = target_bo->offset;
+			buf[j++] = 0;
+		} else {
+			buf[j++] = 0;
+			cmd_address_offset = j * 4;
+			buf[j++] = target_bo->offset;
+		}
+		igt_assert_lt(0, j);
+		buf[j++] = 0x42000000 + val;
+
+		igt_assert(drm_intel_bo_references(cmd_bo, target_bo) == 0);
+
+		igt_assert(drm_intel_bo_emit_reloc(cmd_bo, cmd_address_offset, target_bo, 0,
+					      I915_GEM_DOMAIN_INSTRUCTION,
+					      I915_GEM_DOMAIN_INSTRUCTION) == 0);
+		buf[j++] = MI_BATCH_BUFFER_END;
+		buf[j++] = MI_BATCH_BUFFER_END;
+
+		drm_intel_bo_unmap(cmd_bo);
+
+		igt_assert(drm_intel_bo_references(cmd_bo, target_bo) == 1);
+
+#define LOCAL_I915_EXEC_SECURE (1<<9)
+		igt_assert(drm_intel_bo_mrb_exec(cmd_bo, j * 4, NULL, 0, 0,
+					    I915_EXEC_BLT |
+					    (flags & SECURE_DISPATCH ? LOCAL_I915_EXEC_SECURE : 0))
+			   == 0);
+
+		if (i % divider != 0)
+			goto cont;
+
+		drm_intel_bo_wait_rendering(cmd_bo);
+
+		drm_intel_bo_map(target_bo, 1);
+
+		buf = target_bo->virtual;
+		igt_assert_f(buf[0] == (0x42000000 | val),
+			     "value mismatch: cur 0x%08x, stored 0x%08x\n",
+			     buf[0], 0x42000000 | val);
+
+		buf[0] = 0; /* let batch write it again */
+		drm_intel_bo_unmap(target_bo);
+
+cont:
+		drm_intel_bo_unreference(cmd_bo);
+
+		val++;
+	}
+
+	igt_info("completed %d writes successfully\n", i);
+}
+
+int fd;
+int devid;
+
+igt_main
+{
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		devid = intel_get_drm_devid(fd);
+
+		has_ppgtt = gem_uses_ppgtt(fd);
+
+		/* storedw needs gtt address on gen4+/g33 and snoopable memory.
+		 * Strictly speaking we could implement this now ... */
+		igt_require(intel_gen(devid) >= 6);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		igt_assert(bufmgr);
+
+		//	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+		target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+		igt_assert(target_bo);
+	}
+
+	igt_subtest("normal") {
+		store_dword_loop(1, 0);
+		store_dword_loop(2, 0);
+		store_dword_loop(3, 0);
+		store_dword_loop(5, 0);
+	}
+
+	igt_subtest("secure-dispatch") {
+		store_dword_loop(1, SECURE_DISPATCH);
+		store_dword_loop(2, SECURE_DISPATCH);
+		store_dword_loop(3, SECURE_DISPATCH);
+		store_dword_loop(5, SECURE_DISPATCH);
+	}
+
+	igt_subtest("cached-mapping") {
+		gem_set_caching(fd, target_bo->handle, 1);
+		store_dword_loop(1, 0);
+		store_dword_loop(2, 0);
+		store_dword_loop(3, 0);
+		store_dword_loop(5, 0);
+	}
+
+	igt_subtest("uncached-mapping") {
+		gem_set_caching(fd, target_bo->handle, 0);
+		store_dword_loop(1, 0);
+		store_dword_loop(2, 0);
+		store_dword_loop(3, 0);
+		store_dword_loop(5, 0);
+	}
+
+	igt_fixture {
+		drm_intel_bo_unreference(target_bo);
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_storedw_loop.c b/tests/i915/gem_storedw_loop.c
new file mode 100644
index 00000000..b00555e0
--- /dev/null
+++ b/tests/i915/gem_storedw_loop.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Basic CS check using MI_STORE_DATA_IMM.");
+
+#define LOCAL_I915_EXEC_VEBOX (4<<0)
+
+static int devid;
+
+/*
+ * Testcase: Basic bsd MI check using MI_STORE_DATA_IMM
+ */
+
+static unsigned coherent_domain;
+
+static void *
+mmap_coherent(int fd, uint32_t handle, int size)
+{
+	if (gem_has_llc(fd)) {
+		coherent_domain = I915_GEM_DOMAIN_CPU;
+		return gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE);
+	} else if (gem_mmap__has_wc(fd)) {
+		coherent_domain = I915_GEM_DOMAIN_WC;
+		return gem_mmap__wc(fd, handle, 0, size, PROT_WRITE);
+	} else {
+		coherent_domain = I915_GEM_DOMAIN_GTT;
+		return gem_mmap__gtt(fd, handle, size, PROT_WRITE);
+	}
+}
+
+static void
+store_dword_loop(int fd, int ring, int divider)
+{
+	int i, val = 0;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[divider];
+	uint32_t handle[divider];
+	uint32_t *batch[divider];
+	uint32_t *target;
+	int gen = intel_gen(devid);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, 4096);
+	target = mmap_coherent(fd, obj[0].handle, 4096);
+
+	memset(reloc, 0, sizeof(reloc));
+	for (i = 0; i < divider; i++) {
+		uint32_t *b;
+
+		handle[i] = gem_create(fd, 4096);
+		batch[i] = mmap_coherent(fd, handle[i], 4096);
+		gem_set_domain(fd, handle[i], coherent_domain, coherent_domain);
+
+		b = batch[i];
+		*b++ = MI_STORE_DWORD_IMM;
+		*b++ = 0;
+		*b++ = 0;
+		*b++ = 0;
+		*b++ = MI_BATCH_BUFFER_END;
+
+		reloc[i].target_handle = obj[0].handle;
+		reloc[i].offset = 4;
+		if (gen < 8)
+			reloc[i].offset += 4;
+		reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+		obj[1].relocation_count = 1;
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+
+	igt_info("running storedw loop on render with stall every %i batch\n", divider);
+
+	for (i = 0; i < SLOW_QUICK(0x2000, 0x10); i++) {
+		int j = i % divider;
+
+		gem_set_domain(fd, handle[j], coherent_domain, coherent_domain);
+		batch[j][3] = val;
+		obj[1].handle = handle[j];
+		obj[1].relocs_ptr = to_user_pointer(&reloc[j]);
+		gem_execbuf(fd, &execbuf);
+
+		if (j == 0) {
+			gem_set_domain(fd, obj[0].handle, coherent_domain, 0);
+			igt_assert_f(*target == val,
+				     "%d: value mismatch: stored 0x%08x, expected 0x%08x\n",
+				     i, *target, val);
+		}
+
+		val++;
+	}
+
+	gem_set_domain(fd, obj[0].handle, coherent_domain, 0);
+	igt_info("completed %d writes successfully, current value: 0x%08x\n",
+		 i, target[0]);
+
+	munmap(target, 4096);
+	gem_close(fd, obj[0].handle);
+	for (i = 0; i < divider; ++i) {
+		munmap(batch[i], 4096);
+		gem_close(fd, handle[i]);
+	}
+}
+
+static void
+store_test(int fd, int ring)
+{
+	gem_require_ring(fd, ring);
+	store_dword_loop(fd, ring, 1);
+	store_dword_loop(fd, ring, 2);
+	if (!igt_run_in_simulation()) {
+		store_dword_loop(fd, ring, 3);
+		store_dword_loop(fd, ring, 5);
+		store_dword_loop(fd, ring, 7);
+		store_dword_loop(fd, ring, 11);
+		store_dword_loop(fd, ring, 13);
+		store_dword_loop(fd, ring, 17);
+		store_dword_loop(fd, ring, 19);
+	}
+}
+
+static void
+check_test_requirements(int fd, int ringid)
+{
+	gem_require_ring(fd, ringid);
+	igt_require(gem_can_store_dword(fd, ringid));
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		devid = intel_get_drm_devid(fd);
+
+		igt_skip_on_f(intel_gen(devid) < 6,
+			      "MI_STORE_DATA can only use GTT address on gen4+/g33 and "
+			      "needs snoopable mem on pre-gen6\n");
+
+		/* This only works with ppgtt */
+		igt_require(gem_uses_ppgtt(fd));
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("store-%s", e->name) {
+			check_test_requirements(fd, e->exec_id);
+			store_test(fd, e->exec_id | e->flags);
+		}
+	}
+
+	igt_fixture {
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_streaming_writes.c b/tests/i915/gem_streaming_writes.c
new file mode 100644
index 00000000..e83d69de
--- /dev/null
+++ b/tests/i915/gem_streaming_writes.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+#define OBJECT_SIZE 1024*1024
+#define CHUNK_SIZE 32
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+#define BLT_WRITE_ARGB (BLT_WRITE_ALPHA | BLT_WRITE_RGB)
+
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+IGT_TEST_DESCRIPTION("Test of streaming writes into active GPU sources");
+
+#define SRC 0
+#define DST 1
+#define BATCH 2
+
+#define src exec[SRC].handle
+#define src_offset exec[SRC].offset
+#define dst exec[DST].handle
+#define dst_offset exec[DST].offset
+
+static void test_streaming(int fd, int mode, int sync)
+{
+	const int has_64bit_reloc = intel_gen(intel_get_drm_devid(fd)) >= 8;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[3];
+	struct drm_i915_gem_relocation_entry reloc[128];
+	uint32_t tmp[] = { MI_BATCH_BUFFER_END };
+	uint64_t __src_offset, __dst_offset;
+	uint32_t *s, *d;
+	uint32_t offset;
+	struct {
+		uint32_t handle;
+		uint64_t offset;
+	} *batch;
+	int i, n;
+
+	memset(exec, 0, sizeof(exec));
+	exec[SRC].handle = gem_create(fd, OBJECT_SIZE);
+	exec[DST].handle = gem_create(fd, OBJECT_SIZE);
+
+	switch (mode) {
+	case 0: /* cpu/snoop */
+		gem_set_caching(fd, src, I915_CACHING_CACHED);
+		s = gem_mmap__cpu(fd, src, 0, OBJECT_SIZE,
+				  PROT_READ | PROT_WRITE);
+		break;
+	case 1: /* gtt */
+		s = gem_mmap__gtt(fd, src, OBJECT_SIZE,
+				  PROT_READ | PROT_WRITE);
+		break;
+	case 2: /* wc */
+		s = gem_mmap__wc(fd, src, 0, OBJECT_SIZE,
+				 PROT_READ | PROT_WRITE);
+		break;
+	}
+	*s = 0; /* fault the object into the mappable range first (for GTT) */
+
+	d = gem_mmap__cpu(fd, dst, 0, OBJECT_SIZE, PROT_READ);
+
+	gem_write(fd, dst, 0, tmp, sizeof(tmp));
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(exec);
+	execbuf.buffer_count = 2;
+	execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags = 0;
+		igt_require(__gem_execbuf(fd, &execbuf) == 0);
+	}
+	/* We assume that the active objects are fixed to avoid relocations */
+	__src_offset = src_offset;
+	__dst_offset = dst_offset;
+
+	memset(reloc, 0, sizeof(reloc));
+	for (i = 0; i < 64; i++) {
+		reloc[2*i+0].offset = 64*i + 4 * sizeof(uint32_t);
+		reloc[2*i+0].delta = 0;
+		reloc[2*i+0].target_handle = execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT ? DST : dst;
+		reloc[2*i+0].presumed_offset = dst_offset;
+		reloc[2*i+0].read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc[2*i+0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+		reloc[2*i+1].offset = 64*i + 7 * sizeof(uint32_t);
+		if (has_64bit_reloc)
+			reloc[2*i+1].offset +=  sizeof(uint32_t);
+		reloc[2*i+1].delta = 0;
+		reloc[2*i+1].target_handle = execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT ? SRC : src;
+		reloc[2*i+1].presumed_offset = src_offset;
+		reloc[2*i+1].read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc[2*i+1].write_domain = 0;
+	}
+	gem_execbuf(fd, &execbuf);
+	igt_assert_eq_u64(__src_offset, src_offset);
+	igt_assert_eq_u64(__dst_offset, dst_offset);
+
+	exec[DST].flags = EXEC_OBJECT_WRITE;
+	exec[BATCH].relocation_count = 2;
+	execbuf.buffer_count = 3;
+	execbuf.flags |= I915_EXEC_NO_RELOC;
+	if (gem_has_blt(fd))
+		execbuf.flags |= I915_EXEC_BLT;
+
+	batch = malloc(sizeof(*batch) * (OBJECT_SIZE / CHUNK_SIZE / 64));
+	for (i = n = 0; i < OBJECT_SIZE / CHUNK_SIZE / 64; i++) {
+		uint32_t *base;
+
+		batch[i].handle = gem_create(fd, 4096);
+		batch[i].offset = 0;
+
+		base = gem_mmap__cpu(fd, batch[i].handle, 0, 4096, PROT_WRITE);
+		gem_set_domain(fd, batch[i].handle,
+				I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+		for (int j = 0; j < 64; j++) {
+			unsigned x = (n * CHUNK_SIZE) % 4096 >> 2;
+			unsigned y = (n * CHUNK_SIZE) / 4096;
+			uint32_t *b = base + 16 * j;
+			int k = 0;
+
+			b[k] = COPY_BLT_CMD | BLT_WRITE_ARGB;
+			if (has_64bit_reloc)
+				b[k] += 2;
+			k++;
+			b[k++] = 0xcc << 16 | 1 << 25 | 1 << 24 | 4096;
+			b[k++] = (y << 16) | x;
+			b[k++] = ((y+1) << 16) | (x + (CHUNK_SIZE >> 2));
+			b[k++] = dst_offset;
+			if (has_64bit_reloc)
+				b[k++] = dst_offset >> 32;
+			b[k++] = (y << 16) | x;
+			b[k++] = 4096;
+			b[k++] = src_offset;
+			if (has_64bit_reloc)
+				b[k++] = src_offset >> 32;
+			b[k++] = MI_BATCH_BUFFER_END;
+
+			n++;
+		}
+
+		munmap(base, 4096);
+	}
+
+	for (int pass = 0; pass < 256; pass++) {
+		int domain = mode ? I915_GEM_DOMAIN_GTT : I915_GEM_DOMAIN_CPU;
+		gem_set_domain(fd, src, domain, domain);
+
+		if (pass == 0) {
+			for (i = 0; i < OBJECT_SIZE/4; i++)
+				s[i] = i;
+		}
+
+		/* Now copy from the src to the dst in 32byte chunks */
+		for (offset = 0; offset < OBJECT_SIZE; offset += CHUNK_SIZE) {
+			int b;
+
+			if (pass) {
+				if (sync)
+					gem_set_domain(fd, src, domain, domain);
+				for (i = 0; i < CHUNK_SIZE/4; i++)
+					s[offset/4 + i] = (OBJECT_SIZE*pass + offset)/4 + i;
+			}
+
+			igt_assert(exec[DST].flags & EXEC_OBJECT_WRITE);
+
+			b = offset / CHUNK_SIZE / 64;
+			n = offset / CHUNK_SIZE % 64;
+			exec[BATCH].relocs_ptr = to_user_pointer((reloc + 2*n));
+			exec[BATCH].handle = batch[b].handle;
+			exec[BATCH].offset = batch[b].offset;
+			execbuf.batch_start_offset = 64*n;
+
+			gem_execbuf(fd, &execbuf);
+			igt_assert_eq_u64(__src_offset, src_offset);
+			igt_assert_eq_u64(__dst_offset, dst_offset);
+
+			batch[b].offset = exec[BATCH].offset;
+		}
+
+		gem_set_domain(fd, dst, I915_GEM_DOMAIN_CPU, 0);
+		for (offset = 0; offset < OBJECT_SIZE/4; offset++)
+			igt_assert_eq(pass*OBJECT_SIZE/4 + offset, d[offset]);
+	}
+
+	for (i = 0; i < OBJECT_SIZE / CHUNK_SIZE / 64; i++)
+		gem_close(fd, batch[i].handle);
+	free(batch);
+
+	munmap(s, OBJECT_SIZE);
+	gem_close(fd, src);
+	munmap(d, OBJECT_SIZE);
+	gem_close(fd, dst);
+}
+
+static void test_batch(int fd, int mode, int reverse)
+{
+	const int has_64bit_reloc = intel_gen(intel_get_drm_devid(fd)) >= 8;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	uint32_t tmp[] = { MI_BATCH_BUFFER_END };
+	uint64_t __src_offset, __dst_offset;
+	bool need_64b_start_offset = true;
+	uint64_t batch_size;
+	uint32_t *s, *d;
+	uint32_t *base;
+	uint32_t offset;
+
+	memset(exec, 0, sizeof(exec));
+	exec[DST].handle = gem_create(fd, OBJECT_SIZE);
+	exec[SRC].handle = gem_create(fd, OBJECT_SIZE);
+
+	s = gem_mmap__wc(fd, src, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	d = gem_mmap__cpu(fd, dst, 0, OBJECT_SIZE, PROT_READ);
+
+	memset(reloc, 0, sizeof(reloc));
+	reloc[0].offset =  4 * sizeof(uint32_t);
+	reloc[0].delta = 0;
+	reloc[0].target_handle = execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT ? DST : dst;
+	reloc[0].presumed_offset = dst_offset;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	reloc[1].offset = 7 * sizeof(uint32_t);
+	if (has_64bit_reloc)
+		reloc[1].offset +=  sizeof(uint32_t);
+	reloc[1].delta = 0;
+	reloc[1].target_handle = execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT ? SRC : src;
+	reloc[1].presumed_offset = src_offset;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+
+	batch_size = ALIGN(OBJECT_SIZE / CHUNK_SIZE * 128, 4096);
+	exec[BATCH].relocs_ptr = to_user_pointer(reloc);
+	exec[BATCH].relocation_count = 2;
+	exec[BATCH].handle = gem_create(fd, batch_size);
+
+	switch (mode) {
+	case 0: /* cpu/snoop */
+		igt_require(gem_has_llc(fd));
+		base = gem_mmap__cpu(fd, exec[BATCH].handle, 0, batch_size,
+				     PROT_READ | PROT_WRITE);
+		break;
+	case 1: /* gtt */
+		base = gem_mmap__gtt(fd, exec[BATCH].handle, batch_size,
+				     PROT_READ | PROT_WRITE);
+		break;
+	case 2: /* wc */
+		base = gem_mmap__wc(fd, exec[BATCH].handle, 0, batch_size,
+				    PROT_READ | PROT_WRITE);
+		break;
+	}
+	*base = 0; /* fault the object into the mappable range first */
+
+	gem_write(fd, exec[BATCH].handle, 0, tmp, sizeof(tmp));
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(exec);
+	execbuf.buffer_count = 3;
+	execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
+	if (gem_has_blt(fd))
+		execbuf.flags |= I915_EXEC_BLT;
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags &= ~LOCAL_I915_EXEC_HANDLE_LUT;
+		gem_execbuf(fd, &execbuf);
+	}
+	execbuf.flags |= I915_EXEC_NO_RELOC;
+	exec[DST].flags = EXEC_OBJECT_WRITE;
+	/* We assume that the active objects are fixed to avoid relocations */
+	exec[BATCH].relocation_count = 0;
+	__src_offset = src_offset;
+	__dst_offset = dst_offset;
+
+	offset = mode ? I915_GEM_DOMAIN_GTT : I915_GEM_DOMAIN_CPU;
+	gem_set_domain(fd, exec[BATCH].handle, offset, offset);
+	for (int pass = 0; pass < 256; pass++) {
+		gem_set_domain(fd, src, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+		for (offset = 0; offset < OBJECT_SIZE/4; offset++)
+			s[offset] = OBJECT_SIZE*pass/4 + offset;
+
+		/* Now copy from the src to the dst in 32byte chunks */
+		for (offset = 0; offset < OBJECT_SIZE / CHUNK_SIZE; offset++) {
+			unsigned x = (offset * CHUNK_SIZE) % 4096 >> 2;
+			unsigned y = (offset * CHUNK_SIZE) / 4096;
+			int k;
+
+			execbuf.batch_start_offset = 128 * offset;
+			if (!need_64b_start_offset)
+				execbuf.batch_start_offset += 8 * (pass & 7);
+			igt_assert(execbuf.batch_start_offset <= batch_size - 64);
+			if (reverse)
+				execbuf.batch_start_offset = batch_size - execbuf.batch_start_offset - 64;
+			igt_assert(execbuf.batch_start_offset <= batch_size - 64);
+			k = execbuf.batch_start_offset / 4;
+
+			base[k] = COPY_BLT_CMD | BLT_WRITE_ARGB;
+			if (has_64bit_reloc)
+				base[k] += 2;
+			k++;
+			base[k++] = 0xcc << 16 | 1 << 25 | 1 << 24 | 4096;
+			base[k++] = (y << 16) | x;
+			base[k++] = ((y+1) << 16) | (x + (CHUNK_SIZE >> 2));
+			base[k++] = dst_offset;
+			if (has_64bit_reloc)
+				base[k++] = dst_offset >> 32;
+			base[k++] = (y << 16) | x;
+			base[k++] = 4096;
+			base[k++] = src_offset;
+			if (has_64bit_reloc)
+				base[k++] = src_offset >> 32;
+			base[k++] = MI_BATCH_BUFFER_END;
+
+			igt_assert(exec[DST].flags & EXEC_OBJECT_WRITE);
+			gem_execbuf(fd, &execbuf);
+			igt_assert_eq_u64(__src_offset, src_offset);
+			igt_assert_eq_u64(__dst_offset, dst_offset);
+		}
+
+		gem_set_domain(fd, dst, I915_GEM_DOMAIN_CPU, 0);
+		for (offset = 0; offset < OBJECT_SIZE/4; offset++)
+			igt_assert_eq(pass*OBJECT_SIZE/4 + offset, d[offset]);
+	}
+
+	munmap(base, OBJECT_SIZE / CHUNK_SIZE * 128);
+	gem_close(fd, exec[BATCH].handle);
+
+	munmap(s, OBJECT_SIZE);
+	gem_close(fd, src);
+	munmap(d, OBJECT_SIZE);
+	gem_close(fd, dst);
+}
+
+igt_main
+{
+	int fd, sync;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	for (sync = 2; sync--; ) {
+		igt_subtest_f("cpu%s", sync ? "-sync":"")
+			test_streaming(fd, 0, sync);
+		igt_subtest_f("gtt%s", sync ? "-sync":"")
+			test_streaming(fd, 1, sync);
+		igt_subtest_f("wc%s", sync ? "-sync":"")
+			test_streaming(fd, 2, sync);
+	}
+
+	igt_subtest("batch-cpu")
+		test_batch(fd, 0, 0);
+	igt_subtest("batch-gtt")
+		test_batch(fd, 1, 0);
+	igt_subtest("batch-wc")
+		test_batch(fd, 2, 0);
+	igt_subtest("batch-reverse-cpu")
+		test_batch(fd, 0, 1);
+	igt_subtest("batch-reverse-gtt")
+		test_batch(fd, 1, 1);
+	igt_subtest("batch-reverse-wc")
+		test_batch(fd, 2, 1);
+
+	igt_fixture
+		close(fd);
+}
diff --git a/tests/i915/gem_stress.c b/tests/i915/gem_stress.c
new file mode 100644
index 00000000..225f283e
--- /dev/null
+++ b/tests/i915/gem_stress.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright © 2011 Daniel Vetter
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ * Partially based upon gem_tiled_fence_blits.c
+ */
+
+/** @file gem_stress.c
+ *
+ * This is a general gem coherency test. It's designed to eventually replicate
+ * any possible sequence of access patterns. It works by copying a set of tiles
+ * between two sets of backing buffer objects, randomly permutating the assinged
+ * position on each copy operations.
+ *
+ * The copy operation are done in tiny portions (to reduce any race windows
+ * for corruptions, hence increasing the chances for observing one) and are
+ * constantly switched between all means to copy stuff (fenced blitter, unfenced
+ * render, mmap, pwrite/read).
+ *
+ * After every complete move of a set tiling parameters of a buffer are randomly
+ * changed to simulate the effects of libdrm caching.
+ *
+ * Buffers are 1mb big to nicely fit into fences on gen2/3. A few are further
+ * split up to test relaxed fencing. Using this to push the average working set
+ * size over the available gtt space forces objects to be mapped as unfenceable
+ * (and as a side-effect tests gtt map/unmap coherency).
+ *
+ * In short: designed for maximum evilness.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("General gem coherency test.");
+
+#define CMD_POLY_STIPPLE_OFFSET       0x7906
+
+#define DUCTAPE 0xdead0001
+#define TILESZ	0xdead0002
+#define CHCK_RENDER 0xdead0003
+
+/** TODO:
+ * - beat on relaxed fencing (i.e. mappable/fenceable tracking in the kernel)
+ * - render copy (to check fence tracking and cache coherency management by the
+ *   kernel)
+ * - multi-threading: probably just a wrapper script to launch multiple
+ *   instances + an option to accordingly reduce the working set
+ * - gen6 inter-ring coherency (needs render copy, first)
+ * - variable buffer size
+ * - add an option to fork a second process that randomly sends signals to the
+ *   first one (to check consistency of the kernel recovery paths)
+ */
+
+drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+int drm_fd;
+int devid;
+int num_fences;
+
+drm_intel_bo *busy_bo;
+
+struct option_struct {
+    unsigned scratch_buf_size;
+    unsigned max_dimension;
+    unsigned num_buffers;
+    int trace_tile;
+    int no_hw;
+    int gpu_busy_load;
+    int use_render;
+    int use_blt;
+    int forced_tiling;
+    int use_cpu_maps;
+    int total_rounds;
+    int fail;
+    int tiles_per_buf;
+    int ducttape;
+    int tile_size;
+    int check_render_cpyfn;
+    int use_signal_helper;
+};
+
+struct option_struct options;
+
+#define MAX_BUFS		4096
+#define SCRATCH_BUF_SIZE	1024*1024
+#define BUSY_BUF_SIZE		(256*4096)
+#define TILE_BYTES(size)	((size)*(size)*sizeof(uint32_t))
+
+static struct igt_buf buffers[2][MAX_BUFS];
+/* tile i is at logical position tile_permutation[i] */
+static unsigned *tile_permutation;
+static unsigned num_buffers = 0;
+static unsigned current_set = 0;
+static unsigned target_set = 0;
+static unsigned num_total_tiles = 0;
+
+int fence_storm = 0;
+static int gpu_busy_load = 10;
+
+struct {
+	unsigned num_failed;
+	unsigned max_failed_reads;
+} stats;
+
+static void tile2xy(struct igt_buf *buf, unsigned tile, unsigned *x, unsigned *y)
+{
+	igt_assert(tile < buf->num_tiles);
+	*x = (tile*options.tile_size) % (buf->stride/sizeof(uint32_t));
+	*y = ((tile*options.tile_size) / (buf->stride/sizeof(uint32_t))) * options.tile_size;
+}
+
+static void emit_blt(drm_intel_bo *src_bo, uint32_t src_tiling, unsigned src_pitch,
+		     unsigned src_x, unsigned src_y, unsigned w, unsigned h,
+		     drm_intel_bo *dst_bo, uint32_t dst_tiling, unsigned dst_pitch,
+		     unsigned dst_x, unsigned dst_y)
+{
+	uint32_t cmd_bits = 0;
+
+	if (IS_965(devid) && src_tiling) {
+		src_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
+	}
+
+	if (IS_965(devid) && dst_tiling) {
+		dst_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	/* copy lower half to upper half */
+	BLIT_COPY_BATCH_START(cmd_bits);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst_pitch);
+	OUT_BATCH(dst_y << 16 | dst_x);
+	OUT_BATCH((dst_y+h) << 16 | (dst_x+w));
+	OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(src_y << 16 | src_x);
+	OUT_BATCH(src_pitch);
+	OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	if (batch->gen >= 6) {
+		BEGIN_BATCH(3, 0);
+		OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+		ADVANCE_BATCH();
+	}
+}
+
+/* All this gem trashing wastes too much cpu time, so give the gpu something to
+ * do to increase changes for races. */
+static void keep_gpu_busy(void)
+{
+	int tmp;
+
+	tmp = 1 << gpu_busy_load;
+	igt_assert_lte(tmp, 1024);
+
+	emit_blt(busy_bo, 0, 4096, 0, 0, tmp, 128,
+		 busy_bo, 0, 4096, 0, 128);
+}
+
+static void set_to_cpu_domain(struct igt_buf *buf, int writing)
+{
+	gem_set_domain(drm_fd, buf->bo->handle, I915_GEM_DOMAIN_CPU,
+		       writing ? I915_GEM_DOMAIN_CPU : 0);
+}
+
+static unsigned int copyfunc_seq = 0;
+static void (*copyfunc)(struct igt_buf *src, unsigned src_x, unsigned src_y,
+			struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+			unsigned logical_tile_no);
+
+/* stride, x, y in units of uint32_t! */
+static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigned src_y,
+		     uint32_t *dst, unsigned dst_stride, unsigned dst_x, unsigned dst_y,
+		     unsigned logical_tile_no)
+{
+	int i, j;
+	int failed = 0;
+
+	for (i = 0; i < options.tile_size; i++) {
+		for (j = 0; j < options.tile_size; j++) {
+			unsigned dst_ofs = dst_x + j + dst_stride * (dst_y + i);
+			unsigned src_ofs = src_x + j + src_stride * (src_y + i);
+			unsigned expect = logical_tile_no*options.tile_size*options.tile_size
+			    + i*options.tile_size + j;
+			uint32_t tmp = src[src_ofs];
+			if (tmp != expect) {
+			    igt_info("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n", logical_tile_no, i * options.tile_size + j, tmp, expect, (int)tmp - expect);
+			    igt_fail_on(options.trace_tile >= 0 && options.fail);
+			    failed++;
+			}
+			/* when not aborting, correct any errors */
+			dst[dst_ofs] = expect;
+		}
+	}
+	igt_fail_on(failed && options.fail);
+
+	if (failed > stats.max_failed_reads)
+		stats.max_failed_reads = failed;
+	if (failed)
+		stats.num_failed++;
+}
+
+static void cpu_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
+			 struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+			 unsigned logical_tile_no)
+{
+	igt_assert(batch->ptr == batch->buffer);
+
+	if (options.ducttape)
+		drm_intel_bo_wait_rendering(dst->bo);
+
+	if (options.use_cpu_maps) {
+		set_to_cpu_domain(src, 0);
+		set_to_cpu_domain(dst, 1);
+	}
+
+	cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
+		 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
+		 logical_tile_no);
+}
+
+static void prw_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
+			 struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+			 unsigned logical_tile_no)
+{
+	uint32_t tmp_tile[options.tile_size*options.tile_size];
+	int i;
+
+	igt_assert(batch->ptr == batch->buffer);
+
+	if (options.ducttape)
+		drm_intel_bo_wait_rendering(dst->bo);
+
+	if (src->tiling == I915_TILING_NONE) {
+		for (i = 0; i < options.tile_size; i++) {
+			unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i);
+			drm_intel_bo_get_subdata(src->bo, ofs,
+						 options.tile_size*sizeof(uint32_t),
+						 tmp_tile + options.tile_size*i);
+		}
+	} else {
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(src, 0);
+
+		cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
+			 tmp_tile, options.tile_size, 0, 0, logical_tile_no);
+	}
+
+	if (dst->tiling == I915_TILING_NONE) {
+		for (i = 0; i < options.tile_size; i++) {
+			unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i);
+			drm_intel_bo_subdata(dst->bo, ofs,
+					     options.tile_size*sizeof(uint32_t),
+					     tmp_tile + options.tile_size*i);
+		}
+	} else {
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(dst, 1);
+
+		cpucpy2d(tmp_tile, options.tile_size, 0, 0,
+			 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
+			 logical_tile_no);
+	}
+}
+
+static void blitter_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
+			     struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+			     unsigned logical_tile_no)
+{
+	static unsigned keep_gpu_busy_counter = 0;
+
+	/* check both edges of the fence usage */
+	if (keep_gpu_busy_counter & 1 && !fence_storm)
+		keep_gpu_busy();
+
+	emit_blt(src->bo, src->tiling, src->stride, src_x, src_y,
+		 options.tile_size, options.tile_size,
+		 dst->bo, dst->tiling, dst->stride, dst_x, dst_y);
+
+	if (!(keep_gpu_busy_counter & 1) && !fence_storm)
+		keep_gpu_busy();
+
+	keep_gpu_busy_counter++;
+
+	if (src->tiling)
+		fence_storm--;
+	if (dst->tiling)
+		fence_storm--;
+
+	if (fence_storm <= 1) {
+		fence_storm = 0;
+		intel_batchbuffer_flush(batch);
+	}
+}
+
+static void render_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
+			    struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+			    unsigned logical_tile_no)
+{
+	static unsigned keep_gpu_busy_counter = 0;
+	igt_render_copyfunc_t rendercopy = igt_get_render_copyfunc(devid);
+
+	/* check both edges of the fence usage */
+	if (keep_gpu_busy_counter & 1)
+		keep_gpu_busy();
+
+	if (rendercopy) {
+		/*
+		 * Flush outstanding blts so that they don't end up on
+		 * the render ring when that's not allowed (gen6+).
+		 */
+		intel_batchbuffer_flush(batch);
+		rendercopy(batch, NULL, src, src_x, src_y,
+		     options.tile_size, options.tile_size,
+		     dst, dst_x, dst_y);
+	} else
+		blitter_copyfunc(src, src_x, src_y,
+				 dst, dst_x, dst_y,
+				 logical_tile_no);
+	if (!(keep_gpu_busy_counter & 1))
+		keep_gpu_busy();
+
+	keep_gpu_busy_counter++;
+	intel_batchbuffer_flush(batch);
+}
+
+static void next_copyfunc(int tile)
+{
+	if (fence_storm) {
+		if (tile == options.trace_tile)
+			igt_info(" using fence storm\n");
+		return;
+	}
+
+	if (copyfunc_seq % 61 == 0
+			&& options.forced_tiling != I915_TILING_NONE) {
+		if (tile == options.trace_tile)
+			igt_info(" using fence storm\n");
+		fence_storm = num_fences;
+		copyfunc = blitter_copyfunc;
+	} else if (copyfunc_seq % 17 == 0) {
+		if (tile == options.trace_tile)
+			igt_info(" using cpu\n");
+		copyfunc = cpu_copyfunc;
+	} else if (copyfunc_seq % 19 == 0) {
+		if (tile == options.trace_tile)
+			igt_info(" using prw\n");
+		copyfunc = prw_copyfunc;
+	} else if (copyfunc_seq % 3 == 0 && options.use_render) {
+		if (tile == options.trace_tile)
+			igt_info(" using render\n");
+		copyfunc = render_copyfunc;
+	} else if (options.use_blt){
+		if (tile == options.trace_tile)
+			igt_info(" using blitter\n");
+		copyfunc = blitter_copyfunc;
+	} else if (options.use_render){
+		if (tile == options.trace_tile)
+			igt_info(" using render\n");
+		copyfunc = render_copyfunc;
+	} else {
+		copyfunc = cpu_copyfunc;
+	}
+
+	copyfunc_seq++;
+}
+
+static void fan_out(void)
+{
+	uint32_t tmp_tile[options.tile_size*options.tile_size];
+	uint32_t seq = 0;
+	int i, k;
+	unsigned tile, buf_idx, x, y;
+
+	for (i = 0; i < num_total_tiles; i++) {
+		tile = i;
+		buf_idx = tile / options.tiles_per_buf;
+		tile %= options.tiles_per_buf;
+
+		tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
+
+		for (k = 0; k < options.tile_size*options.tile_size; k++)
+			tmp_tile[k] = seq++;
+
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(&buffers[current_set][buf_idx], 1);
+
+		cpucpy2d(tmp_tile, options.tile_size, 0, 0,
+			 buffers[current_set][buf_idx].data,
+			 buffers[current_set][buf_idx].stride / sizeof(uint32_t),
+			 x, y, i);
+	}
+
+	for (i = 0; i < num_total_tiles; i++)
+		tile_permutation[i] = i;
+}
+
+static void fan_in_and_check(void)
+{
+	uint32_t tmp_tile[options.tile_size*options.tile_size];
+	unsigned tile, buf_idx, x, y;
+	int i;
+	for (i = 0; i < num_total_tiles; i++) {
+		tile = tile_permutation[i];
+		buf_idx = tile / options.tiles_per_buf;
+		tile %= options.tiles_per_buf;
+
+		tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
+
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(&buffers[current_set][buf_idx], 0);
+
+		cpucpy2d(buffers[current_set][buf_idx].data,
+			 buffers[current_set][buf_idx].stride / sizeof(uint32_t),
+			 x, y,
+			 tmp_tile, options.tile_size, 0, 0,
+			 i);
+	}
+}
+
+static void sanitize_stride(struct igt_buf *buf)
+{
+
+	if (igt_buf_height(buf) > options.max_dimension)
+		buf->stride = buf->size / options.max_dimension;
+
+	if (igt_buf_height(buf) < options.tile_size)
+		buf->stride = buf->size / options.tile_size;
+
+	if (igt_buf_width(buf) < options.tile_size)
+		buf->stride = options.tile_size * sizeof(uint32_t);
+
+	igt_assert(buf->stride <= 8192);
+	igt_assert(igt_buf_width(buf) <= options.max_dimension);
+	igt_assert(igt_buf_height(buf) <= options.max_dimension);
+
+	igt_assert(igt_buf_width(buf) >= options.tile_size);
+	igt_assert(igt_buf_height(buf) >= options.tile_size);
+
+}
+
+static void init_buffer(struct igt_buf *buf, unsigned size)
+{
+	memset(buf, 0, sizeof(*buf));
+
+	buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096);
+	buf->size = size;
+	igt_assert(buf->bo);
+	buf->tiling = I915_TILING_NONE;
+	buf->stride = 4096;
+
+	sanitize_stride(buf);
+
+	if (options.no_hw)
+		buf->data = malloc(size);
+	else {
+		if (options.use_cpu_maps)
+			drm_intel_bo_map(buf->bo, 1);
+		else
+			drm_intel_gem_bo_map_gtt(buf->bo);
+		buf->data = buf->bo->virtual;
+	}
+
+	buf->num_tiles = options.tiles_per_buf;
+}
+
+static void exchange_buf(void *array, unsigned i, unsigned j)
+{
+	struct igt_buf *buf_arr, tmp;
+	buf_arr = array;
+
+	memcpy(&tmp, &buf_arr[i], sizeof(struct igt_buf));
+	memcpy(&buf_arr[i], &buf_arr[j], sizeof(struct igt_buf));
+	memcpy(&buf_arr[j], &tmp, sizeof(struct igt_buf));
+}
+
+
+static void init_set(unsigned set)
+{
+	long int r;
+	int i;
+
+	igt_permute_array(buffers[set], num_buffers, exchange_buf);
+
+	if (current_set == 1 && options.gpu_busy_load == 0) {
+		gpu_busy_load++;
+		if (gpu_busy_load > 10)
+			gpu_busy_load = 6;
+	}
+
+	for (i = 0; i < num_buffers; i++) {
+		r = random();
+		if ((r & 3) != 0)
+		    continue;
+		r >>= 2;
+
+		if ((r & 3) != 0)
+			buffers[set][i].tiling = I915_TILING_X;
+		else
+			buffers[set][i].tiling = I915_TILING_NONE;
+		r >>= 2;
+		if (options.forced_tiling >= 0)
+			buffers[set][i].tiling = options.forced_tiling;
+
+		if (buffers[set][i].tiling == I915_TILING_NONE) {
+			/* min 64 byte stride */
+			r %= 8;
+			buffers[set][i].stride = 64 * (1 << r);
+		} else if (IS_GEN2(devid)) {
+			/* min 128 byte stride */
+			r %= 7;
+			buffers[set][i].stride = 128 * (1 << r);
+		} else {
+			/* min 512 byte stride */
+			r %= 5;
+			buffers[set][i].stride = 512 * (1 << r);
+		}
+
+		sanitize_stride(&buffers[set][i]);
+
+		gem_set_tiling(drm_fd, buffers[set][i].bo->handle,
+			       buffers[set][i].tiling,
+			       buffers[set][i].stride);
+
+		if (options.trace_tile != -1 && i == options.trace_tile/options.tiles_per_buf)
+			igt_info("changing buffer %i containing tile %i: tiling %i, stride %i\n", i, options.trace_tile, buffers[set][i].tiling, buffers[set][i].stride);
+	}
+}
+
+static void exchange_uint(void *array, unsigned i, unsigned j)
+{
+	unsigned *i_arr = array;
+
+	igt_swap(i_arr[i], i_arr[j]);
+}
+
+static void copy_tiles(unsigned *permutation)
+{
+	unsigned src_tile, src_buf_idx, src_x, src_y;
+	unsigned dst_tile, dst_buf_idx, dst_x, dst_y;
+	struct igt_buf *src_buf, *dst_buf;
+	int i, idx;
+	for (i = 0; i < num_total_tiles; i++) {
+		/* tile_permutation is independent of current_permutation, so
+		 * abuse it to randomize the order of the src bos */
+		idx  = tile_permutation[i];
+		src_buf_idx = idx / options.tiles_per_buf;
+		src_tile = idx % options.tiles_per_buf;
+		src_buf = &buffers[current_set][src_buf_idx];
+
+		tile2xy(src_buf, src_tile, &src_x, &src_y);
+
+		dst_buf_idx = permutation[idx] / options.tiles_per_buf;
+		dst_tile = permutation[idx] % options.tiles_per_buf;
+		dst_buf = &buffers[target_set][dst_buf_idx];
+
+		tile2xy(dst_buf, dst_tile, &dst_x, &dst_y);
+
+		if (options.trace_tile == i)
+			igt_info("copying tile %i from %i (%i, %i) to %i (%i, %i)", i, tile_permutation[i], src_buf_idx, src_tile, permutation[idx], dst_buf_idx, dst_tile);
+
+		if (options.no_hw) {
+			cpucpy2d(src_buf->data,
+				 src_buf->stride / sizeof(uint32_t),
+				 src_x, src_y,
+				 dst_buf->data,
+				 dst_buf->stride / sizeof(uint32_t),
+				 dst_x, dst_y,
+				 i);
+		} else {
+			next_copyfunc(i);
+
+			copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y,
+				 i);
+		}
+	}
+
+	intel_batchbuffer_flush(batch);
+}
+
+static void sanitize_tiles_per_buf(void)
+{
+	if (options.tiles_per_buf > options.scratch_buf_size / TILE_BYTES(options.tile_size))
+		options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
+}
+
+static int parse_options(int opt, int opt_index, void *data)
+{
+	int tmp;
+
+	switch(opt) {
+		case 'd':
+			options.no_hw = 1;
+			igt_info("no-hw debug mode\n");
+			break;
+		case 'S':
+			options.use_signal_helper = 0;
+			igt_info("disabling that pesky nuisance who keeps interrupting us\n");
+			break;
+		case 's':
+			tmp = atoi(optarg);
+			if (tmp < options.tile_size*8192)
+				igt_info("scratch buffer size needs to be at least %i\n", options.tile_size * 8192);
+			else if (tmp & (tmp - 1)) {
+				igt_info("scratch buffer size needs to be a power-of-two\n");
+			} else {
+				igt_info("fixed scratch buffer size to %u\n", tmp);
+				options.scratch_buf_size = tmp;
+				sanitize_tiles_per_buf();
+			}
+			break;
+		case 'g':
+			tmp = atoi(optarg);
+			if (tmp < 0 || tmp > 10)
+				igt_info("gpu busy load needs to be bigger than 0 and smaller than 10\n");
+			else {
+				igt_info("gpu busy load factor set to %i\n", tmp);
+				gpu_busy_load = options.gpu_busy_load = tmp;
+			}
+			break;
+		case 'c':
+			options.num_buffers = atoi(optarg);
+			igt_info("buffer count set to %i\n", options.num_buffers);
+			break;
+		case 't':
+			options.trace_tile = atoi(optarg);
+			igt_info("tracing tile %i\n", options.trace_tile);
+			break;
+		case 'r':
+			options.use_render = 0;
+			igt_info("disabling render copy\n");
+			break;
+		case 'b':
+			options.use_blt = 0;
+			igt_info("disabling blt copy\n");
+			break;
+		case 'u':
+			options.forced_tiling = I915_TILING_NONE;
+			igt_info("disabling tiling\n");
+			break;
+		case 'x':
+			if (options.use_cpu_maps) {
+				igt_info("tiling not possible with cpu maps\n");
+			} else {
+				options.forced_tiling = I915_TILING_X;
+				igt_info("using only X-tiling\n");
+			}
+			break;
+		case 'm':
+			options.use_cpu_maps = 1;
+			options.forced_tiling = I915_TILING_NONE;
+			igt_info("disabling tiling\n");
+			break;
+		case 'o':
+			options.total_rounds = atoi(optarg);
+			igt_info("total rounds %i\n", options.total_rounds);
+			break;
+		case 'f':
+			options.fail = 0;
+			igt_info("not failing when detecting errors\n");
+			break;
+		case 'p':
+			options.tiles_per_buf = atoi(optarg);
+			igt_info("tiles per buffer %i\n", options.tiles_per_buf);
+			break;
+		case DUCTAPE:
+			options.ducttape = 0;
+			igt_info("applying duct-tape\n");
+			break;
+		case TILESZ:
+			options.tile_size = atoi(optarg);
+			sanitize_tiles_per_buf();
+			igt_info("til size %i\n", options.tile_size);
+			break;
+		case CHCK_RENDER:
+			options.check_render_cpyfn = 1;
+			igt_info("checking render copy function\n");
+			break;
+	}
+
+	/* actually 32767, according to docs, but that kills our nice pot calculations. */
+	options.max_dimension = 16*1024;
+	if (options.use_render) {
+		if (IS_GEN2(devid) || IS_GEN3(devid))
+			options.max_dimension = 2048;
+		else
+			options.max_dimension = 8192;
+	}
+	igt_info("Limiting buffer to %dx%d\n", options.max_dimension, options.max_dimension);
+
+	return 0;
+}
+
+static void init(void)
+{
+	int i;
+	unsigned tmp;
+
+	if (options.num_buffers == 0) {
+		tmp = gem_aperture_size(drm_fd);
+		tmp = min(256 * (1024 * 1024), tmp);
+		num_buffers = 2 * tmp / options.scratch_buf_size / 3;
+		num_buffers /= 2;
+		igt_info("using %u buffers\n", num_buffers);
+	} else
+		num_buffers = options.num_buffers;
+
+	bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr);
+	num_fences = gem_available_fences(drm_fd);
+	igt_assert_lt(4, num_fences);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	busy_bo = drm_intel_bo_alloc(bufmgr, "tiled bo", BUSY_BUF_SIZE, 4096);
+	if (options.forced_tiling >= 0)
+		gem_set_tiling(drm_fd, busy_bo->handle, options.forced_tiling, 4096);
+
+	for (i = 0; i < num_buffers; i++) {
+		init_buffer(&buffers[0][i], options.scratch_buf_size);
+		init_buffer(&buffers[1][i], options.scratch_buf_size);
+
+		num_total_tiles += buffers[0][i].num_tiles;
+	}
+	current_set = 0;
+
+	/* just in case it helps reproducability */
+	srandom(0xdeadbeef);
+}
+
+static void check_render_copyfunc(void)
+{
+	struct igt_buf src, dst;
+	uint32_t *ptr;
+	int i, j, pass;
+
+	if (!options.check_render_cpyfn)
+		return;
+
+	init_buffer(&src, options.scratch_buf_size);
+	init_buffer(&dst, options.scratch_buf_size);
+
+	for (pass = 0; pass < 16; pass++) {
+		int sx = random() % (igt_buf_width(&src)-options.tile_size);
+		int sy = random() % (igt_buf_height(&src)-options.tile_size);
+		int dx = random() % (igt_buf_width(&dst)-options.tile_size);
+		int dy = random() % (igt_buf_height(&dst)-options.tile_size);
+
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(&src, 1);
+
+		memset(src.data, 0xff, options.scratch_buf_size);
+		for (j = 0; j < options.tile_size; j++) {
+			ptr = (uint32_t*)((char *)src.data + sx*4 + (sy+j) * src.stride);
+			for (i = 0; i < options.tile_size; i++)
+				ptr[i] = j * options.tile_size + i;
+		}
+
+		render_copyfunc(&src, sx, sy, &dst, dx, dy, 0);
+
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(&dst, 0);
+
+		for (j = 0; j < options.tile_size; j++) {
+			ptr = (uint32_t*)((char *)dst.data + dx*4 + (dy+j) * dst.stride);
+			for (i = 0; i < options.tile_size; i++)
+				if (ptr[i] != j * options.tile_size + i) {
+					igt_info("render copyfunc mismatch at (%d, %d): found %d, expected %d\n", i, j, ptr[i], j * options.tile_size + i);
+				}
+		}
+	}
+}
+
+
+int main(int argc, char **argv)
+{
+	int i, j;
+	unsigned *current_permutation, *tmp_permutation;
+	static struct option long_options[] = {
+		{"no-hw", 0, 0, 'd'},
+		{"buf-size", 1, 0, 's'},
+		{"gpu-busy-load", 1, 0, 'g'},
+		{"no-signals", 0, 0, 'S'},
+		{"buffer-count", 1, 0, 'c'},
+		{"trace-tile", 1, 0, 't'},
+		{"disable-blt", 0, 0, 'b'},
+		{"disable-render", 0, 0, 'r'},
+		{"untiled", 0, 0, 'u'},
+		{"x-tiled", 0, 0, 'x'},
+		{"use-cpu-maps", 0, 0, 'm'},
+		{"rounds", 1, 0, 'o'},
+		{"no-fail", 0, 0, 'f'},
+		{"tiles-per-buf", 0, 0, 'p'},
+		{"remove-duct-tape", 0, 0, DUCTAPE},
+		{"tile-size", 1, 0, TILESZ},
+		{"check-render-cpyfn", 0, 0, CHCK_RENDER},
+		{NULL, 0, 0, 0},
+	};
+
+	options.scratch_buf_size = 256*4096;
+	options.no_hw = 0;
+	options.use_signal_helper = 1;
+	options.gpu_busy_load = 0;
+	options.num_buffers = 0;
+	options.trace_tile = -1;
+	options.use_render = 1;
+	options.use_blt = 1;
+	options.forced_tiling = -1;
+	options.use_cpu_maps = 0;
+	options.total_rounds = 512;
+	options.fail = 1;
+	options.ducttape = 1;
+	options.tile_size = 16;
+	options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
+	options.check_render_cpyfn = 0;
+
+	igt_simple_init_parse_opts(&argc, argv,"ds:g:c:t:rbuxmo:fp:",
+				   long_options, NULL, parse_options, NULL);
+
+	drm_fd = drm_open_driver(DRIVER_INTEL);
+	devid = intel_get_drm_devid(drm_fd);
+
+	/* start our little helper early before too may allocations occur */
+	if (options.use_signal_helper)
+		igt_fork_signal_helper();
+
+	init();
+
+	check_render_copyfunc();
+
+	tile_permutation = malloc(num_total_tiles*sizeof(uint32_t));
+	current_permutation = malloc(num_total_tiles*sizeof(uint32_t));
+	tmp_permutation = malloc(num_total_tiles*sizeof(uint32_t));
+	igt_assert(tile_permutation);
+	igt_assert(current_permutation);
+	igt_assert(tmp_permutation);
+
+	fan_out();
+
+	for (i = 0; i < options.total_rounds; i++) {
+		igt_info("round %i\n", i);
+		if (i % 64 == 63) {
+			fan_in_and_check();
+			igt_info("everything correct after %i rounds\n", i + 1);
+		}
+
+		target_set = (current_set + 1) & 1;
+		init_set(target_set);
+
+		for (j = 0; j < num_total_tiles; j++)
+			current_permutation[j] = j;
+		igt_permute_array(current_permutation, num_total_tiles, exchange_uint);
+
+		copy_tiles(current_permutation);
+
+		memcpy(tmp_permutation, tile_permutation, sizeof(unsigned)*num_total_tiles);
+
+		/* accumulate the permutations */
+		for (j = 0; j < num_total_tiles; j++)
+			tile_permutation[j] = current_permutation[tmp_permutation[j]];
+
+		current_set = target_set;
+	}
+
+	fan_in_and_check();
+
+	igt_info("num failed tiles %u, max incoherent bytes %zd\n", stats.num_failed, stats.max_failed_reads * sizeof(uint32_t));
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(drm_fd);
+
+	igt_stop_signal_helper();
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_sync.c b/tests/i915/gem_sync.c
new file mode 100644
index 00000000..fb209977
--- /dev/null
+++ b/tests/i915/gem_sync.c
@@ -0,0 +1,1304 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <time.h>
+#include <pthread.h>
+
+#include "igt.h"
+#include "igt_sysfs.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
+#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
+
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+IGT_TEST_DESCRIPTION("Basic check of ring<->ring write synchronisation.");
+
+/*
+ * Testcase: Basic check of sync
+ *
+ * Extremely efficient at catching missed irqs
+ */
+
+static double gettime(void)
+{
+	static clockid_t clock = -1;
+	struct timespec ts;
+
+	/* Stay on the same clock for consistency. */
+	if (clock != (clockid_t)-1) {
+		if (clock_gettime(clock, &ts))
+			goto error;
+		goto out;
+	}
+
+#ifdef CLOCK_MONOTONIC_RAW
+	if (!clock_gettime(clock = CLOCK_MONOTONIC_RAW, &ts))
+		goto out;
+#endif
+#ifdef CLOCK_MONOTONIC_COARSE
+	if (!clock_gettime(clock = CLOCK_MONOTONIC_COARSE, &ts))
+		goto out;
+#endif
+	if (!clock_gettime(clock = CLOCK_MONOTONIC, &ts))
+		goto out;
+error:
+	igt_warn("Could not read monotonic time: %s\n",
+			strerror(errno));
+	igt_assert(0);
+	return 0;
+
+out:
+	return ts.tv_sec + 1e-9*ts.tv_nsec;
+}
+
+static void
+sync_ring(int fd, unsigned ring, int num_children, int timeout)
+{
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+
+		num_children *= num_engines;
+	} else {
+		gem_require_ring(fd, ring);
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_children) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object;
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double start, elapsed;
+		unsigned long cycles;
+
+		memset(&object, 0, sizeof(object));
+		object.handle = gem_create(fd, 4096);
+		gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(&object);
+		execbuf.buffer_count = 1;
+		execbuf.flags = engines[child % num_engines];
+		gem_execbuf(fd, &execbuf);
+		gem_sync(fd, object.handle);
+
+		start = gettime();
+		cycles = 0;
+		do {
+			do {
+				gem_execbuf(fd, &execbuf);
+				gem_sync(fd, object.handle);
+			} while (++cycles & 1023);
+		} while ((elapsed = gettime() - start) < timeout);
+		igt_info("%s%sompleted %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, elapsed*1e6/cycles);
+
+		gem_close(fd, object.handle);
+	}
+	igt_waitchildren_timeout(timeout+10, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void
+idle_ring(int fd, unsigned ring, int timeout)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 object;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	double start, elapsed;
+	unsigned long cycles;
+
+	gem_require_ring(fd, ring);
+
+	memset(&object, 0, sizeof(object));
+	object.handle = gem_create(fd, 4096);
+	gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&object);
+	execbuf.buffer_count = 1;
+	execbuf.flags = ring;
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, object.handle);
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	start = gettime();
+	cycles = 0;
+	do {
+		do {
+			gem_execbuf(fd, &execbuf);
+			gem_quiescent_gpu(fd);
+		} while (++cycles & 1023);
+	} while ((elapsed = gettime() - start) < timeout);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	igt_info("Completed %ld cycles: %.3f us\n",
+		 cycles, elapsed*1e6/cycles);
+
+	gem_close(fd, object.handle);
+}
+
+static void
+wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
+{
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+		igt_require(num_engines);
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_engines) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object;
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double end, this, elapsed, now, baseline;
+		unsigned long cycles;
+		uint32_t cmd;
+		igt_spin_t *spin;
+
+		memset(&object, 0, sizeof(object));
+		object.handle = gem_create(fd, 4096);
+		gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(&object);
+		execbuf.buffer_count = 1;
+		execbuf.flags = engines[child % num_engines];
+
+		spin = __igt_spin_batch_new(fd,
+					    .engine = execbuf.flags,
+					    .flags = (IGT_SPIN_POLL_RUN |
+						      IGT_SPIN_FAST));
+		igt_assert(spin->running);
+		cmd = *spin->batch;
+
+		gem_execbuf(fd, &execbuf);
+
+		igt_spin_batch_end(spin);
+		gem_sync(fd, object.handle);
+
+		for (int warmup = 0; warmup <= 1; warmup++) {
+			end = gettime() + timeout/10.;
+			elapsed = 0;
+			cycles = 0;
+			do {
+				*spin->batch = cmd;
+				*spin->running = 0;
+				gem_execbuf(fd, &spin->execbuf);
+				while (!READ_ONCE(*spin->running))
+					;
+
+				this = gettime();
+				igt_spin_batch_end(spin);
+				gem_sync(fd, spin->handle);
+				now = gettime();
+
+				elapsed += now - this;
+				cycles++;
+			} while (now < end);
+			baseline = elapsed / cycles;
+		}
+		igt_info("%s%saseline %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " b" : "B",
+			 cycles, elapsed*1e6/cycles);
+
+		end = gettime() + timeout;
+		elapsed = 0;
+		cycles = 0;
+		do {
+			*spin->batch = cmd;
+			*spin->running = 0;
+			gem_execbuf(fd, &spin->execbuf);
+			while (!READ_ONCE(*spin->running))
+				;
+
+			for (int n = 0; n < wlen; n++)
+				gem_execbuf(fd, &execbuf);
+
+			this = gettime();
+			igt_spin_batch_end(spin);
+			gem_sync(fd, object.handle);
+			now = gettime();
+
+			elapsed += now - this;
+			cycles++;
+		} while (now < end);
+		elapsed -= cycles * baseline;
+
+		igt_info("%s%sompleted %ld cycles: %.3f + %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, 1e6*baseline, elapsed*1e6/cycles);
+
+		igt_spin_batch_free(fd, spin);
+		gem_close(fd, object.handle);
+	}
+	igt_waitchildren_timeout(2*timeout, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void active_ring(int fd, unsigned ring, int timeout)
+{
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+		igt_require(num_engines);
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_engines) {
+		double start, end, elapsed;
+		unsigned long cycles;
+		igt_spin_t *spin[2];
+		uint32_t cmd;
+
+		spin[0] = __igt_spin_batch_new(fd,
+					       .engine = ring,
+					       .flags = IGT_SPIN_FAST);
+		cmd = *spin[0]->batch;
+
+		spin[1] = __igt_spin_batch_new(fd,
+					       .engine = ring,
+					       .flags = IGT_SPIN_FAST);
+		igt_assert(*spin[1]->batch == cmd);
+
+		start = gettime();
+		end = start + timeout;
+		cycles = 0;
+		do {
+			for (int loop = 0; loop < 1024; loop++) {
+				igt_spin_t *s = spin[loop & 1];
+
+				igt_spin_batch_end(s);
+				gem_sync(fd, s->handle);
+
+				*s->batch = cmd;
+				gem_execbuf(fd, &s->execbuf);
+			}
+			cycles += 1024;
+		} while ((elapsed = gettime()) < end);
+		igt_spin_batch_free(fd, spin[1]);
+		igt_spin_batch_free(fd, spin[0]);
+
+		igt_info("%s%sompleted %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, (elapsed - start)*1e6/cycles);
+	}
+	igt_waitchildren_timeout(2*timeout, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void
+active_wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
+{
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+		igt_require(num_engines);
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_engines) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object;
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double end, this, elapsed, now, baseline;
+		unsigned long cycles;
+		igt_spin_t *spin[2];
+		uint32_t cmd;
+
+		memset(&object, 0, sizeof(object));
+		object.handle = gem_create(fd, 4096);
+		gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(&object);
+		execbuf.buffer_count = 1;
+		execbuf.flags = engines[child % num_engines];
+
+		spin[0] = __igt_spin_batch_new(fd,
+					       .engine = execbuf.flags,
+					       .flags = (IGT_SPIN_POLL_RUN |
+							 IGT_SPIN_FAST));
+		igt_assert(spin[0]->running);
+		cmd = *spin[0]->batch;
+
+		spin[1] = __igt_spin_batch_new(fd,
+					       .engine = execbuf.flags,
+					       .flags = (IGT_SPIN_POLL_RUN |
+							 IGT_SPIN_FAST));
+
+		gem_execbuf(fd, &execbuf);
+
+		igt_spin_batch_end(spin[1]);
+		igt_spin_batch_end(spin[0]);
+		gem_sync(fd, object.handle);
+
+		for (int warmup = 0; warmup <= 1; warmup++) {
+			*spin[0]->batch = cmd;
+			*spin[0]->running = 0;
+			gem_execbuf(fd, &spin[0]->execbuf);
+
+			end = gettime() + timeout/10.;
+			elapsed = 0;
+			cycles = 0;
+			do {
+				while (!READ_ONCE(*spin[0]->running))
+					;
+
+				*spin[1]->batch = cmd;
+				*spin[1]->running = 0;
+				gem_execbuf(fd, &spin[1]->execbuf);
+
+				this = gettime();
+				igt_spin_batch_end(spin[0]);
+				gem_sync(fd, spin[0]->handle);
+				now = gettime();
+
+				elapsed += now - this;
+				cycles++;
+				igt_swap(spin[0], spin[1]);
+			} while (now < end);
+			igt_spin_batch_end(spin[0]);
+			baseline = elapsed / cycles;
+		}
+		igt_info("%s%saseline %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " b" : "B",
+			 cycles, elapsed*1e6/cycles);
+
+		*spin[0]->batch = cmd;
+		*spin[0]->running = 0;
+		gem_execbuf(fd, &spin[0]->execbuf);
+
+		end = gettime() + timeout;
+		elapsed = 0;
+		cycles = 0;
+		do {
+			while (!READ_ONCE(*spin[0]->running))
+				;
+
+			for (int n = 0; n < wlen; n++)
+				gem_execbuf(fd, &execbuf);
+
+			*spin[1]->batch = cmd;
+			*spin[1]->running = 0;
+			gem_execbuf(fd, &spin[1]->execbuf);
+
+			this = gettime();
+			igt_spin_batch_end(spin[0]);
+			gem_sync(fd, object.handle);
+			now = gettime();
+
+			elapsed += now - this;
+			cycles++;
+			igt_swap(spin[0], spin[1]);
+		} while (now < end);
+		igt_spin_batch_end(spin[0]);
+		elapsed -= cycles * baseline;
+
+		igt_info("%s%sompleted %ld cycles: %.3f + %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, 1e6*baseline, elapsed*1e6/cycles);
+
+		igt_spin_batch_free(fd, spin[1]);
+		igt_spin_batch_free(fd, spin[0]);
+		gem_close(fd, object.handle);
+	}
+	igt_waitchildren_timeout(2*timeout, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void
+store_ring(int fd, unsigned ring, int num_children, int timeout)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+
+		num_children *= num_engines;
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_children) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object[2];
+		struct drm_i915_gem_relocation_entry reloc[1024];
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double start, elapsed;
+		unsigned long cycles;
+		uint32_t *batch, *b;
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(object);
+		execbuf.flags = engines[child % num_engines];
+		execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+		execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+		if (gen < 6)
+			execbuf.flags |= I915_EXEC_SECURE;
+
+		memset(object, 0, sizeof(object));
+		object[0].handle = gem_create(fd, 4096);
+		gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe));
+		execbuf.buffer_count = 1;
+		gem_execbuf(fd, &execbuf);
+
+		object[0].flags |= EXEC_OBJECT_WRITE;
+		object[1].handle = gem_create(fd, 20*1024);
+
+		object[1].relocs_ptr = to_user_pointer(reloc);
+		object[1].relocation_count = 1024;
+
+		batch = gem_mmap__cpu(fd, object[1].handle, 0, 20*1024,
+				PROT_WRITE | PROT_READ);
+		gem_set_domain(fd, object[1].handle,
+				I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+		memset(reloc, 0, sizeof(reloc));
+		b = batch;
+		for (int i = 0; i < 1024; i++) {
+			uint64_t offset;
+
+			reloc[i].presumed_offset = object[0].offset;
+			reloc[i].offset = (b - batch + 1) * sizeof(*batch);
+			reloc[i].delta = i * sizeof(uint32_t);
+			reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+			reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+			offset = object[0].offset + reloc[i].delta;
+			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			if (gen >= 8) {
+				*b++ = offset;
+				*b++ = offset >> 32;
+			} else if (gen >= 4) {
+				*b++ = 0;
+				*b++ = offset;
+				reloc[i].offset += sizeof(*batch);
+			} else {
+				b[-1] -= 1;
+				*b++ = offset;
+			}
+			*b++ = i;
+		}
+		*b++ = MI_BATCH_BUFFER_END;
+		igt_assert((b - batch)*sizeof(uint32_t) < 20*1024);
+		munmap(batch, 20*1024);
+		execbuf.buffer_count = 2;
+		gem_execbuf(fd, &execbuf);
+		gem_sync(fd, object[1].handle);
+
+		start = gettime();
+		cycles = 0;
+		do {
+			do {
+				gem_execbuf(fd, &execbuf);
+				gem_sync(fd, object[1].handle);
+			} while (++cycles & 1023);
+		} while ((elapsed = gettime() - start) < timeout);
+		igt_info("%s%sompleted %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, elapsed*1e6/cycles);
+
+		gem_close(fd, object[1].handle);
+		gem_close(fd, object[0].handle);
+	}
+	igt_waitchildren_timeout(timeout+10, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void
+switch_ring(int fd, unsigned ring, int num_children, int timeout)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	gem_require_contexts(fd);
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+
+		num_children *= num_engines;
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_children) {
+		struct context {
+			struct drm_i915_gem_exec_object2 object[2];
+			struct drm_i915_gem_relocation_entry reloc[1024];
+			struct drm_i915_gem_execbuffer2 execbuf;
+		} contexts[2];
+		double start, elapsed;
+		unsigned long cycles;
+
+		for (int i = 0; i < ARRAY_SIZE(contexts); i++) {
+			const uint32_t bbe = MI_BATCH_BUFFER_END;
+			const uint32_t sz = 32 << 10;
+			struct context *c = &contexts[i];
+			uint32_t *batch, *b;
+
+			memset(&c->execbuf, 0, sizeof(c->execbuf));
+			c->execbuf.buffers_ptr = to_user_pointer(c->object);
+			c->execbuf.flags = engines[child % num_engines];
+			c->execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+			c->execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+			if (gen < 6)
+				c->execbuf.flags |= I915_EXEC_SECURE;
+			c->execbuf.rsvd1 = gem_context_create(fd);
+
+			memset(c->object, 0, sizeof(c->object));
+			c->object[0].handle = gem_create(fd, 4096);
+			gem_write(fd, c->object[0].handle, 0, &bbe, sizeof(bbe));
+			c->execbuf.buffer_count = 1;
+			gem_execbuf(fd, &c->execbuf);
+
+			c->object[0].flags |= EXEC_OBJECT_WRITE;
+			c->object[1].handle = gem_create(fd, sz);
+
+			c->object[1].relocs_ptr = to_user_pointer(c->reloc);
+			c->object[1].relocation_count = 1024;
+
+			batch = gem_mmap__cpu(fd, c->object[1].handle, 0, sz,
+					PROT_WRITE | PROT_READ);
+			gem_set_domain(fd, c->object[1].handle,
+					I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+			memset(c->reloc, 0, sizeof(c->reloc));
+			b = batch;
+			for (int r = 0; r < 1024; r++) {
+				uint64_t offset;
+
+				c->reloc[r].presumed_offset = c->object[0].offset;
+				c->reloc[r].offset = (b - batch + 1) * sizeof(*batch);
+				c->reloc[r].delta = r * sizeof(uint32_t);
+				c->reloc[r].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+				c->reloc[r].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+				offset = c->object[0].offset + c->reloc[r].delta;
+				*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+				if (gen >= 8) {
+					*b++ = offset;
+					*b++ = offset >> 32;
+				} else if (gen >= 4) {
+					*b++ = 0;
+					*b++ = offset;
+					c->reloc[r].offset += sizeof(*batch);
+				} else {
+					b[-1] -= 1;
+					*b++ = offset;
+				}
+				*b++ = r;
+				*b++ = 0x5 << 23;
+			}
+			*b++ = MI_BATCH_BUFFER_END;
+			igt_assert((b - batch)*sizeof(uint32_t) < sz);
+			munmap(batch, sz);
+			c->execbuf.buffer_count = 2;
+			gem_execbuf(fd, &c->execbuf);
+			gem_sync(fd, c->object[1].handle);
+		}
+
+		cycles = 0;
+		elapsed = 0;
+		start = gettime();
+		do {
+			do {
+				double this;
+
+				gem_execbuf(fd, &contexts[0].execbuf);
+				gem_execbuf(fd, &contexts[1].execbuf);
+
+				this = gettime();
+				gem_sync(fd, contexts[1].object[1].handle);
+				elapsed += gettime() - this;
+
+				gem_sync(fd, contexts[0].object[1].handle);
+			} while (++cycles & 1023);
+		} while ((gettime() - start) < timeout);
+		igt_info("%s%sompleted %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, elapsed*1e6/cycles);
+
+		for (int i = 0; i < ARRAY_SIZE(contexts); i++) {
+			gem_close(fd, contexts[i].object[1].handle);
+			gem_close(fd, contexts[i].object[0].handle);
+			gem_context_destroy(fd, contexts[i].execbuf.rsvd1);
+		}
+	}
+	igt_waitchildren_timeout(timeout+10, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void xchg(void *array, unsigned i, unsigned j)
+{
+	uint32_t *u32 = array;
+	uint32_t tmp = u32[i];
+	u32[i] = u32[j];
+	u32[j] = tmp;
+}
+
+struct waiter {
+	pthread_t thread;
+	pthread_mutex_t mutex;
+	pthread_cond_t cond;
+
+	int ready;
+	volatile int *done;
+
+	int fd;
+	struct drm_i915_gem_exec_object2 object;
+	uint32_t handles[64];
+};
+
+static void *waiter(void *arg)
+{
+	struct waiter *w = arg;
+
+	do {
+		pthread_mutex_lock(&w->mutex);
+		w->ready = 0;
+		pthread_cond_signal(&w->cond);
+		while (!w->ready)
+			pthread_cond_wait(&w->cond, &w->mutex);
+		pthread_mutex_unlock(&w->mutex);
+		if (*w->done < 0)
+			return NULL;
+
+		gem_sync(w->fd, w->object.handle);
+		for (int n = 0;  n < ARRAY_SIZE(w->handles); n++)
+			gem_sync(w->fd, w->handles[n]);
+	} while (1);
+}
+
+static void
+__store_many(int fd, unsigned ring, int timeout, unsigned long *cycles)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 object[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_relocation_entry reloc[1024];
+	struct waiter threads[64];
+	int order[64];
+	uint32_t *batch, *b;
+	int done;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(object);
+	execbuf.flags = ring;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	memset(object, 0, sizeof(object));
+	object[0].handle = gem_create(fd, 4096);
+	gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe));
+	execbuf.buffer_count = 1;
+	gem_execbuf(fd, &execbuf);
+	object[0].flags |= EXEC_OBJECT_WRITE;
+
+	object[1].relocs_ptr = to_user_pointer(reloc);
+	object[1].relocation_count = 1024;
+	execbuf.buffer_count = 2;
+
+	memset(reloc, 0, sizeof(reloc));
+	b = batch = malloc(20*1024);
+	for (int i = 0; i < 1024; i++) {
+		uint64_t offset;
+
+		reloc[i].presumed_offset = object[0].offset;
+		reloc[i].offset = (b - batch + 1) * sizeof(*batch);
+		reloc[i].delta = i * sizeof(uint32_t);
+		reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+		offset = object[0].offset + reloc[i].delta;
+		*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			*b++ = offset;
+			*b++ = offset >> 32;
+		} else if (gen >= 4) {
+			*b++ = 0;
+			*b++ = offset;
+			reloc[i].offset += sizeof(*batch);
+		} else {
+			b[-1] -= 1;
+			*b++ = offset;
+		}
+		*b++ = i;
+	}
+	*b++ = MI_BATCH_BUFFER_END;
+	igt_assert((b - batch)*sizeof(uint32_t) < 20*1024);
+
+	done = 0;
+	for (int i = 0; i < ARRAY_SIZE(threads); i++) {
+		threads[i].fd = fd;
+		threads[i].object = object[1];
+		threads[i].object.handle = gem_create(fd, 20*1024);
+		gem_write(fd, threads[i].object.handle, 0, batch, 20*1024);
+
+		pthread_cond_init(&threads[i].cond, NULL);
+		pthread_mutex_init(&threads[i].mutex, NULL);
+		threads[i].done = &done;
+		threads[i].ready = 0;
+
+		pthread_create(&threads[i].thread, NULL, waiter, &threads[i]);
+		order[i] = i;
+	}
+	free(batch);
+
+	for (int i = 0; i < ARRAY_SIZE(threads); i++) {
+		for (int j = 0; j < ARRAY_SIZE(threads); j++)
+			threads[i].handles[j] = threads[j].object.handle;
+	}
+
+	igt_until_timeout(timeout) {
+		for (int i = 0; i < ARRAY_SIZE(threads); i++) {
+			pthread_mutex_lock(&threads[i].mutex);
+			while (threads[i].ready)
+				pthread_cond_wait(&threads[i].cond,
+						  &threads[i].mutex);
+			pthread_mutex_unlock(&threads[i].mutex);
+			igt_permute_array(threads[i].handles,
+					  ARRAY_SIZE(threads[i].handles),
+					  xchg);
+		}
+
+		igt_permute_array(order, ARRAY_SIZE(threads), xchg);
+		for (int i = 0; i < ARRAY_SIZE(threads); i++) {
+			object[1] = threads[i].object;
+			gem_execbuf(fd, &execbuf);
+			threads[i].object = object[1];
+		}
+		++*cycles;
+
+		for (int i = 0; i < ARRAY_SIZE(threads); i++) {
+			struct waiter *w = &threads[order[i]];
+
+			w->ready = 1;
+			pthread_cond_signal(&w->cond);
+		}
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(threads); i++) {
+		pthread_mutex_lock(&threads[i].mutex);
+		while (threads[i].ready)
+			pthread_cond_wait(&threads[i].cond, &threads[i].mutex);
+		pthread_mutex_unlock(&threads[i].mutex);
+	}
+	done = -1;
+	for (int i = 0; i < ARRAY_SIZE(threads); i++) {
+		threads[i].ready = 1;
+		pthread_cond_signal(&threads[i].cond);
+		pthread_join(threads[i].thread, NULL);
+		gem_close(fd, threads[i].object.handle);
+	}
+
+	gem_close(fd, object[0].handle);
+}
+
+static void
+store_many(int fd, unsigned ring, int timeout)
+{
+	unsigned long *shared;
+	const char *names[16];
+	int n = 0;
+
+	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(shared != MAP_FAILED);
+
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			igt_fork(child, 1)
+				__store_many(fd,
+					     ring,
+					     timeout,
+					     &shared[n]);
+
+			names[n++] = e__->name;
+		}
+		igt_waitchildren();
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		__store_many(fd, ring, timeout, &shared[n]);
+		names[n++] = NULL;
+	}
+
+	for (int i = 0; i < n; i++) {
+		igt_info("%s%sompleted %ld cycles\n",
+			 names[i] ?: "", names[i] ? " c" : "C", shared[i]);
+	}
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+	munmap(shared, 4096);
+}
+
+static void
+sync_all(int fd, int num_children, int timeout)
+{
+	unsigned engines[16], engine;
+	int num_engines = 0;
+
+	for_each_physical_engine(fd, engine) {
+		engines[num_engines++] = engine;
+		if (num_engines == ARRAY_SIZE(engines))
+			break;
+	}
+	igt_require(num_engines);
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_children) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object;
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double start, elapsed;
+		unsigned long cycles;
+
+		memset(&object, 0, sizeof(object));
+		object.handle = gem_create(fd, 4096);
+		gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(&object);
+		execbuf.buffer_count = 1;
+		gem_execbuf(fd, &execbuf);
+		gem_sync(fd, object.handle);
+
+		start = gettime();
+		cycles = 0;
+		do {
+			do {
+				for (int n = 0; n < num_engines; n++) {
+					execbuf.flags = engines[n];
+					gem_execbuf(fd, &execbuf);
+				}
+				gem_sync(fd, object.handle);
+			} while (++cycles & 1023);
+		} while ((elapsed = gettime() - start) < timeout);
+		igt_info("Completed %ld cycles: %.3f us\n",
+			 cycles, elapsed*1e6/cycles);
+
+		gem_close(fd, object.handle);
+	}
+	igt_waitchildren_timeout(timeout+10, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void
+store_all(int fd, int num_children, int timeout)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	unsigned engines[16];
+	int num_engines = 0;
+	unsigned int ring;
+
+	for_each_physical_engine(fd, ring) {
+		if (!gem_can_store_dword(fd, ring))
+			continue;
+
+		engines[num_engines++] = ring;
+		if (num_engines == ARRAY_SIZE(engines))
+			break;
+	}
+	igt_require(num_engines);
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_children) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object[2];
+		struct drm_i915_gem_relocation_entry reloc[1024];
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double start, elapsed;
+		unsigned long cycles;
+		uint32_t *batch, *b;
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(object);
+		execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+		execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+		if (gen < 6)
+			execbuf.flags |= I915_EXEC_SECURE;
+
+		memset(object, 0, sizeof(object));
+		object[0].handle = gem_create(fd, 4096);
+		gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe));
+		execbuf.buffer_count = 1;
+		gem_execbuf(fd, &execbuf);
+
+		object[0].flags |= EXEC_OBJECT_WRITE;
+		object[1].handle = gem_create(fd, 1024*16 + 4096);
+
+		object[1].relocs_ptr = to_user_pointer(reloc);
+		object[1].relocation_count = 1024;
+
+		batch = gem_mmap__cpu(fd, object[1].handle, 0, 16*1024 + 4096,
+				PROT_WRITE | PROT_READ);
+		gem_set_domain(fd, object[1].handle,
+				I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+		memset(reloc, 0, sizeof(reloc));
+		b = batch;
+		for (int i = 0; i < 1024; i++) {
+			uint64_t offset;
+
+			reloc[i].presumed_offset = object[0].offset;
+			reloc[i].offset = (b - batch + 1) * sizeof(*batch);
+			reloc[i].delta = i * sizeof(uint32_t);
+			reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+			reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+			offset = object[0].offset + reloc[i].delta;
+			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			if (gen >= 8) {
+				*b++ = offset;
+				*b++ = offset >> 32;
+			} else if (gen >= 4) {
+				*b++ = 0;
+				*b++ = offset;
+				reloc[i].offset += sizeof(*batch);
+			} else {
+				b[-1] -= 1;
+				*b++ = offset;
+			}
+			*b++ = i;
+		}
+		*b++ = MI_BATCH_BUFFER_END;
+		igt_assert((b - batch)*sizeof(uint32_t) < 20*1024);
+		munmap(batch, 16*1024+4096);
+		execbuf.buffer_count = 2;
+		gem_execbuf(fd, &execbuf);
+		gem_sync(fd, object[1].handle);
+
+		start = gettime();
+		cycles = 0;
+		do {
+			do {
+				igt_permute_array(engines, num_engines, xchg);
+				for (int n = 0; n < num_engines; n++) {
+					execbuf.flags &= ~ENGINE_MASK;
+					execbuf.flags |= engines[n];
+					gem_execbuf(fd, &execbuf);
+				}
+				gem_sync(fd, object[1].handle);
+			} while (++cycles & 1023);
+		} while ((elapsed = gettime() - start) < timeout);
+		igt_info("Completed %ld cycles: %.3f us\n",
+			 cycles, elapsed*1e6/cycles);
+
+		gem_close(fd, object[1].handle);
+		gem_close(fd, object[0].handle);
+	}
+	igt_waitchildren_timeout(timeout+10, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+static void
+preempt(int fd, unsigned ring, int num_children, int timeout)
+{
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+	uint32_t ctx[2];
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+
+		num_children *= num_engines;
+	} else {
+		gem_require_ring(fd, ring);
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	ctx[0] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[0], MIN_PRIO);
+
+	ctx[1] = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx[1], MAX_PRIO);
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_children) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object;
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double start, elapsed;
+		unsigned long cycles;
+
+		memset(&object, 0, sizeof(object));
+		object.handle = gem_create(fd, 4096);
+		gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(&object);
+		execbuf.buffer_count = 1;
+		execbuf.flags = engines[child % num_engines];
+		execbuf.rsvd1 = ctx[1];
+		gem_execbuf(fd, &execbuf);
+		gem_sync(fd, object.handle);
+
+		start = gettime();
+		cycles = 0;
+		do {
+			igt_spin_t *spin =
+				__igt_spin_batch_new(fd,
+						     .ctx = ctx[0],
+						     .engine = execbuf.flags);
+
+			do {
+				gem_execbuf(fd, &execbuf);
+				gem_sync(fd, object.handle);
+			} while (++cycles & 1023);
+
+			igt_spin_batch_free(fd, spin);
+		} while ((elapsed = gettime() - start) < timeout);
+		igt_info("%s%sompleted %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, elapsed*1e6/cycles);
+
+		gem_close(fd, object.handle);
+	}
+	igt_waitchildren_timeout(timeout+10, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+
+	gem_context_destroy(fd, ctx[1]);
+	gem_context_destroy(fd, ctx[0]);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	int fd = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+		gem_submission_print_method(fd);
+		gem_scheduler_print_capability(fd);
+
+		igt_fork_hang_detector(fd);
+	}
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_subtest_f("%s", e->name)
+			sync_ring(fd, e->exec_id | e->flags, 1, 150);
+		igt_subtest_f("idle-%s", e->name)
+			idle_ring(fd, e->exec_id | e->flags, 150);
+		igt_subtest_f("active-%s", e->name)
+			active_ring(fd, e->exec_id | e->flags, 150);
+		igt_subtest_f("wakeup-%s", e->name)
+			wakeup_ring(fd, e->exec_id | e->flags, 150, 1);
+		igt_subtest_f("active-wakeup-%s", e->name)
+			active_wakeup_ring(fd, e->exec_id | e->flags, 150, 1);
+		igt_subtest_f("double-wakeup-%s", e->name)
+			wakeup_ring(fd, e->exec_id | e->flags, 150, 2);
+		igt_subtest_f("store-%s", e->name)
+			store_ring(fd, e->exec_id | e->flags, 1, 150);
+		igt_subtest_f("switch-%s", e->name)
+			switch_ring(fd, e->exec_id | e->flags, 1, 150);
+		igt_subtest_f("forked-switch-%s", e->name)
+			switch_ring(fd, e->exec_id | e->flags, ncpus, 150);
+		igt_subtest_f("many-%s", e->name)
+			store_many(fd, e->exec_id | e->flags, 150);
+		igt_subtest_f("forked-%s", e->name)
+			sync_ring(fd, e->exec_id | e->flags, ncpus, 150);
+		igt_subtest_f("forked-store-%s", e->name)
+			store_ring(fd, e->exec_id | e->flags, ncpus, 150);
+	}
+
+	igt_subtest("basic-each")
+		sync_ring(fd, ALL_ENGINES, 1, 5);
+	igt_subtest("basic-store-each")
+		store_ring(fd, ALL_ENGINES, 1, 5);
+	igt_subtest("basic-many-each")
+		store_many(fd, ALL_ENGINES, 5);
+	igt_subtest("switch-each")
+		switch_ring(fd, ALL_ENGINES, 1, 150);
+	igt_subtest("forked-switch-each")
+		switch_ring(fd, ALL_ENGINES, ncpus, 150);
+	igt_subtest("forked-each")
+		sync_ring(fd, ALL_ENGINES, ncpus, 150);
+	igt_subtest("forked-store-each")
+		store_ring(fd, ALL_ENGINES, ncpus, 150);
+	igt_subtest("active-each")
+		active_ring(fd, ALL_ENGINES, 150);
+	igt_subtest("wakeup-each")
+		wakeup_ring(fd, ALL_ENGINES, 150, 1);
+	igt_subtest("active-wakeup-each")
+		active_wakeup_ring(fd, ALL_ENGINES, 150, 1);
+	igt_subtest("double-wakeup-each")
+		wakeup_ring(fd, ALL_ENGINES, 150, 2);
+
+	igt_subtest("basic-all")
+		sync_all(fd, 1, 5);
+	igt_subtest("basic-store-all")
+		store_all(fd, 1, 5);
+
+	igt_subtest("all")
+		sync_all(fd, 1, 150);
+	igt_subtest("store-all")
+		store_all(fd, 1, 150);
+	igt_subtest("forked-all")
+		sync_all(fd, ncpus, 150);
+	igt_subtest("forked-store-all")
+		store_all(fd, ncpus, 150);
+
+	igt_subtest_group {
+		igt_fixture {
+			gem_require_contexts(fd);
+			igt_require(gem_scheduler_has_ctx_priority(fd));
+			igt_require(gem_scheduler_has_preemption(fd));
+		}
+
+		igt_subtest("preempt-all")
+			preempt(fd, ALL_ENGINES, 1, 20);
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_f("preempt-%s", e->name)
+				preempt(fd, e->exec_id | e->flags, ncpus, 150);
+		}
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_threaded_access_tiled.c b/tests/i915/gem_threaded_access_tiled.c
new file mode 100644
index 00000000..288324d4
--- /dev/null
+++ b/tests/i915/gem_threaded_access_tiled.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Mika Kuoppala <mika.kuoppala@intel.com>
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Check parallel access to tiled memory.");
+
+/* Testcase: check parallel access to tiled memory
+ *
+ * Parallel access to tiled memory caused sigbus
+ */
+
+#define NUM_THREADS 2
+#define WIDTH 4096
+#define HEIGHT 4096
+
+struct thread_ctx {
+	drm_intel_bo *bo;
+};
+
+static drm_intel_bufmgr *bufmgr;
+static struct thread_ctx tctx[NUM_THREADS];
+
+static void *copy_fn(void *p)
+{
+	unsigned char *buf;
+	struct thread_ctx *c = p;
+
+	buf = malloc(WIDTH * HEIGHT);
+	if (buf == NULL)
+		return (void *)1;
+
+	memcpy(buf, c->bo->virtual, WIDTH * HEIGHT);
+
+	free(buf);
+	return (void *)0;
+}
+
+static int copy_tile_threaded(drm_intel_bo *bo)
+{
+	int i;
+	int r;
+	pthread_t thr[NUM_THREADS];
+	void *status;
+
+	for (i = 0; i < NUM_THREADS; i++) {
+		tctx[i].bo = bo;
+		r = pthread_create(&thr[i], NULL, copy_fn, (void *)&tctx[i]);
+		igt_assert_eq(r, 0);
+	}
+
+	for (i = 0;  i < NUM_THREADS; i++) {
+		pthread_join(thr[i], &status);
+		igt_assert(status == 0);
+	}
+
+	return 0;
+}
+
+igt_simple_main
+{
+	int fd;
+	drm_intel_bo *bo;
+	uint32_t tiling_mode = I915_TILING_Y;
+	unsigned long pitch = 0;
+	int r;
+
+	igt_skip_on_simulation();
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_assert(fd >= 0);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	igt_assert(bufmgr);
+
+	bo = drm_intel_bo_alloc_tiled(bufmgr, "mmap bo", WIDTH, HEIGHT, 1,
+				      &tiling_mode, &pitch, 0);
+	igt_assert(bo);
+
+	r = drm_intel_gem_bo_map_gtt(bo);
+	igt_assert(!r);
+
+	r = copy_tile_threaded(bo);
+	igt_assert(!r);
+
+	r = drm_intel_gem_bo_unmap_gtt(bo);
+	igt_assert(!r);
+
+	drm_intel_bo_unreference(bo);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_tiled_blits.c b/tests/i915/gem_tiled_blits.c
new file mode 100644
index 00000000..51c1b584
--- /dev/null
+++ b/tests/i915/gem_tiled_blits.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_blits.c
+ *
+ * This is a test of doing many tiled blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to catch a couple types of failure;
+ * - Fence management problems on pre-965.
+ * - A17 or L-shaped memory tiling workaround problems in acceleration.
+ *
+ * The model is to fill a collection of 1MB objects in a way that can't trip
+ * over A6 swizzling -- upload data to a non-tiled object, blit to the tiled
+ * object.  Then, copy the 1MB objects randomly between each other for a while.
+ * Finally, download their data through linear objects again and see what
+ * resulted.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Test doing many tiled blits, with a working set larger"
+		     " than the aperture size.");
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static int width = 512, height = 512;
+
+static drm_intel_bo *
+create_bo(uint32_t start_val)
+{
+	drm_intel_bo *bo, *linear_bo;
+	uint32_t *linear;
+	uint32_t tiling = I915_TILING_X;
+	int i;
+
+	bo = drm_intel_bo_alloc(bufmgr, "tiled bo", 1024 * 1024, 4096);
+	do_or_die(drm_intel_bo_set_tiling(bo, &tiling, width * 4));
+	igt_assert(tiling == I915_TILING_X);
+
+	linear_bo = drm_intel_bo_alloc(bufmgr, "linear src", 1024 * 1024, 4096);
+
+	/* Fill the BO with dwords starting at start_val */
+	do_or_die(drm_intel_bo_map(linear_bo, 1));
+	linear = linear_bo->virtual;
+	for (i = 0; i < 1024 * 1024 / 4; i++)
+		linear[i] = start_val++;
+	drm_intel_bo_unmap(linear_bo);
+
+	intel_copy_bo (batch, bo, linear_bo, width*height*4);
+
+	drm_intel_bo_unreference(linear_bo);
+
+	return bo;
+}
+
+static void
+check_bo(drm_intel_bo *bo, uint32_t val)
+{
+	drm_intel_bo *linear_bo;
+	uint32_t *linear;
+	int num_errors;
+	int i;
+
+	linear_bo = drm_intel_bo_alloc(bufmgr, "linear dst", 1024 * 1024, 4096);
+
+	intel_copy_bo(batch, linear_bo, bo, width*height*4);
+
+	do_or_die(drm_intel_bo_map(linear_bo, 0));
+	linear = linear_bo->virtual;
+
+	num_errors = 0;
+	for (i = 0; i < 1024 * 1024 / 4; i++) {
+		if (linear[i] != val && num_errors++ < 32)
+			igt_warn("[%08x] Expected 0x%08x, found 0x%08x (difference 0x%08x)\n",
+				 i * 4, val, linear[i], val ^ linear[i]);
+		val++;
+	}
+	igt_assert_eq(num_errors, 0);
+	drm_intel_bo_unmap(linear_bo);
+
+	drm_intel_bo_unreference(linear_bo);
+}
+
+static void run_test(int count)
+{
+	drm_intel_bo **bo;
+	uint32_t *bo_start_val;
+	uint32_t start = 0;
+	int i;
+
+	igt_debug("Using %d 1MiB buffers\n", count);
+
+	bo = malloc(sizeof(drm_intel_bo *)*count);
+	bo_start_val = malloc(sizeof(uint32_t)*count);
+
+	for (i = 0; i < count; i++) {
+		bo[i] = create_bo(start);
+		bo_start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+	igt_info("Verifying initialisation...\n");
+	for (i = 0; i < count; i++)
+		check_bo(bo[i], bo_start_val[i]);
+
+	igt_info("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i+1) % count;
+
+		if (src == dst)
+			continue;
+
+		intel_copy_bo(batch, bo[dst], bo[src], width*height*4);
+		bo_start_val[dst] = bo_start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(bo[i], bo_start_val[i]);
+
+	if (igt_run_in_simulation()) {
+		for (i = 0; i < count; i++)
+			drm_intel_bo_unreference(bo[i]);
+		free(bo_start_val);
+		free(bo);
+		return;
+	}
+
+	igt_info("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i+1) % count;
+		int dst = i % count;
+
+		if (src == dst)
+			continue;
+
+		intel_copy_bo(batch, bo[dst], bo[src], width*height*4);
+		bo_start_val[dst] = bo_start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(bo[i], bo_start_val[i]);
+
+	igt_info("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		intel_copy_bo(batch, bo[dst], bo[src], width*height*4);
+		bo_start_val[dst] = bo_start_val[src];
+	}
+	for (i = 0; i < count; i++) {
+		check_bo(bo[i], bo_start_val[i]);
+		drm_intel_bo_unreference(bo[i]);
+	}
+
+	free(bo_start_val);
+	free(bo);
+}
+
+#define MAX_32b ((1ull << 32) - 4096)
+
+int fd;
+
+int main(int argc, char **argv)
+{
+	igt_subtest_init(argc, argv);
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+		drm_intel_bufmgr_gem_set_vma_cache_size(bufmgr, 32);
+		batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	}
+
+	igt_subtest("basic")
+		run_test(2);
+
+	igt_subtest("normal") {
+		uint64_t count;
+
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / (1024*1024) / 2;
+		count += (count & 1) == 0;
+		intel_require_memory(count, 1024*1024, CHECK_RAM);
+
+		run_test(count);
+	}
+
+	igt_subtest("interruptible") {
+		uint64_t count;
+
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / (1024*1024) / 2;
+		count += (count & 1) == 0;
+		intel_require_memory(count, 1024*1024, CHECK_RAM);
+
+		igt_fork_signal_helper();
+		run_test(count);
+		igt_stop_signal_helper();
+	}
+
+	igt_fixture {
+		intel_batchbuffer_free(batch);
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_tiled_fence_blits.c b/tests/i915/gem_tiled_fence_blits.c
new file mode 100644
index 00000000..693e96ce
--- /dev/null
+++ b/tests/i915/gem_tiled_fence_blits.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright © 2009,2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_fence_blits.c
+ *
+ * This is a test of doing many tiled blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to catch a couple types of failure;
+ * - Fence management problems on pre-965.
+ * - A17 or L-shaped memory tiling workaround problems in acceleration.
+ *
+ * The model is to fill a collection of 1MB objects in a way that can't trip
+ * over A6 swizzling -- upload data to a non-tiled object, blit to the tiled
+ * object.  Then, copy the 1MB objects randomly between each other for a while.
+ * Finally, download their data through linear objects again and see what
+ * resulted.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+enum {width=512, height=512};
+static const int bo_size = width * height * 4;
+static uint32_t linear[width * height];
+
+static drm_intel_bo *
+create_bo(int fd, uint32_t start_val)
+{
+	drm_intel_bo *bo;
+	uint32_t tiling = I915_TILING_X;
+	int ret, i;
+
+	bo = drm_intel_bo_alloc(bufmgr, "tiled bo", bo_size, 4096);
+	ret = drm_intel_bo_set_tiling(bo, &tiling, width * 4);
+	igt_assert_eq(ret, 0);
+	igt_assert(tiling == I915_TILING_X);
+
+	/* Fill the BO with dwords starting at start_val */
+	for (i = 0; i < width * height; i++)
+		linear[i] = start_val++;
+
+	gem_write(fd, bo->handle, 0, linear, sizeof(linear));
+
+	return bo;
+}
+
+static void
+check_bo(int fd, drm_intel_bo *bo, uint32_t start_val)
+{
+	int i;
+
+	gem_read(fd, bo->handle, 0, linear, sizeof(linear));
+
+	for (i = 0; i < width * height; i++) {
+		igt_assert_f(linear[i] == start_val,
+			     "Expected 0x%08x, found 0x%08x "
+			     "at offset 0x%08x\n",
+			     start_val, linear[i], i * 4);
+		start_val++;
+	}
+}
+
+static void run_test(int fd, int count)
+{
+	drm_intel_bo **bo;
+	uint32_t *bo_start_val;
+	uint32_t start = 0;
+	int i;
+
+	count |= 1;
+	igt_info("Using %d 1MiB buffers\n", count);
+
+	bo = malloc(count * sizeof(*bo));
+	bo_start_val = malloc(count * sizeof(*bo_start_val));
+	igt_assert(bo && bo_start_val);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	for (i = 0; i < count; i++) {
+		bo[i] = create_bo(fd, start);
+		bo_start_val[i] = start;
+
+		/*
+		igt_info("Creating bo %d\n", i);
+		check_bo(bo[i], bo_start_val[i]);
+		*/
+
+		start += width * height;
+	}
+
+	for (i = 0; i < count; i++) {
+		int src = count - i - 1;
+		intel_copy_bo(batch, bo[i], bo[src], bo_size);
+		bo_start_val[i] = bo_start_val[src];
+	}
+
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		intel_copy_bo(batch, bo[dst], bo[src], bo_size);
+		bo_start_val[dst] = bo_start_val[src];
+
+		/*
+		check_bo(bo[dst], bo_start_val[dst]);
+		igt_info("%d: copy bo %d to %d\n", i, src, dst);
+		*/
+	}
+
+	for (i = 0; i < count; i++) {
+		/*
+		igt_info("check %d\n", i);
+		*/
+		check_bo(fd, bo[i], bo_start_val[i]);
+
+		drm_intel_bo_unreference(bo[i]);
+		bo[i] = NULL;
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	free(bo_start_val);
+	free(bo);
+}
+
+#define MAX_32b ((1ull << 32) - 4096)
+
+igt_main
+{
+	int fd;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	igt_subtest("basic") {
+		run_test (fd, 2);
+	}
+
+	/* the rest of the tests are too long for simulation */
+	igt_skip_on_simulation();
+
+	igt_subtest("normal") {
+		uint64_t count;
+
+		count = gem_aperture_size(fd);
+		if (count >> 32)
+			count = MAX_32b;
+		count = 3 * count / bo_size / 2;
+		intel_require_memory(count, bo_size, CHECK_RAM);
+		run_test(fd, count);
+	}
+
+	close(fd);
+}
diff --git a/tests/i915/gem_tiled_partial_pwrite_pread.c b/tests/i915/gem_tiled_partial_pwrite_pread.c
new file mode 100644
index 00000000..c2f44c9d
--- /dev/null
+++ b/tests/i915/gem_tiled_partial_pwrite_pread.c
@@ -0,0 +1,311 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Test pwrite/pread consistency when touching partial"
+		     " cachelines.");
+
+/*
+ * Testcase: pwrite/pread consistency when touching partial cachelines
+ *
+ * Some fancy new pwrite/pread optimizations clflush in-line while
+ * reading/writing. Check whether all required clflushes happen.
+ *
+ * Unfortunately really old mesa used unaligned pread/pwrite for s/w fallback
+ * rendering, so we need to check whether this works on tiled buffers, too.
+ *
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+drm_intel_bo *scratch_bo;
+drm_intel_bo *staging_bo;
+drm_intel_bo *tiled_staging_bo;
+unsigned long scratch_pitch;
+#define BO_SIZE (32*4096)
+uint32_t devid;
+int fd;
+
+static void
+copy_bo(drm_intel_bo *src, int src_tiled,
+	drm_intel_bo *dst, int dst_tiled)
+{
+	unsigned long dst_pitch = scratch_pitch;
+	unsigned long src_pitch = scratch_pitch;
+	uint32_t cmd_bits = 0;
+
+	/* dst is tiled ... */
+	if (intel_gen(devid) >= 4 && dst_tiled) {
+		dst_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	if (intel_gen(devid) >= 4 && dst_tiled) {
+		src_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
+	}
+
+	BLIT_COPY_BATCH_START(cmd_bits);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst_pitch);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(BO_SIZE/scratch_pitch << 16 | 1024);
+	OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(src_pitch);
+	OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+static void
+blt_bo_fill(drm_intel_bo *tmp_bo, drm_intel_bo *bo, int val)
+{
+	uint8_t *gtt_ptr;
+	int i;
+
+	drm_intel_gem_bo_map_gtt(tmp_bo);
+	gtt_ptr = tmp_bo->virtual;
+
+	for (i = 0; i < BO_SIZE; i++)
+		gtt_ptr[i] = val;
+
+	drm_intel_gem_bo_unmap_gtt(tmp_bo);
+
+	igt_drop_caches_set(fd, DROP_BOUND);
+
+	copy_bo(tmp_bo, 0, bo, 1);
+}
+
+#define MAX_BLT_SIZE 128
+#define ROUNDS 200
+uint8_t tmp[BO_SIZE];
+uint8_t compare_tmp[BO_SIZE];
+
+static void test_partial_reads(void)
+{
+	int i, j;
+
+	for (i = 0; i < ROUNDS; i++) {
+		int start, len;
+		int val = i % 256;
+
+		blt_bo_fill(staging_bo, scratch_bo, i);
+
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		drm_intel_bo_get_subdata(scratch_bo, start, len, tmp);
+		for (j = 0; j < len; j++) {
+			igt_assert_f(tmp[j] == val,
+				     "mismatch at %i, got: %i, expected: %i\n",
+				     start + j, tmp[j], val);
+		}
+
+		igt_progress("partial reads test: ", i, ROUNDS);
+	}
+}
+
+static void test_partial_writes(void)
+{
+	int i, j;
+
+	for (i = 0; i < ROUNDS; i++) {
+		int start, len;
+		int val = i % 256;
+
+		blt_bo_fill(staging_bo, scratch_bo, i);
+
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		memset(tmp, i + 63, BO_SIZE);
+
+		drm_intel_bo_subdata(scratch_bo, start, len, tmp);
+
+		copy_bo(scratch_bo, 1, tiled_staging_bo, 1);
+		drm_intel_bo_get_subdata(tiled_staging_bo, 0, BO_SIZE,
+					 compare_tmp);
+
+		for (j = 0; j < start; j++) {
+			igt_assert_f(compare_tmp[j] == val,
+				     "mismatch at %i, got: %i, expected: %i\n",
+				     j, tmp[j], val);
+		}
+		for (; j < start + len; j++) {
+			igt_assert_f(compare_tmp[j] == tmp[0],
+				     "mismatch at %i, got: %i, expected: %i\n",
+				     j, tmp[j], i);
+		}
+		for (; j < BO_SIZE; j++) {
+			igt_assert_f(compare_tmp[j] == val,
+				     "mismatch at %i, got: %i, expected: %i\n",
+				     j, tmp[j], val);
+		}
+		drm_intel_gem_bo_unmap_gtt(staging_bo);
+
+		igt_progress("partial writes test: ", i, ROUNDS);
+	}
+}
+
+static void test_partial_read_writes(void)
+{
+	int i, j;
+
+	for (i = 0; i < ROUNDS; i++) {
+		int start, len;
+		int val = i % 256;
+
+		blt_bo_fill(staging_bo, scratch_bo, i);
+
+		/* partial read */
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		drm_intel_bo_get_subdata(scratch_bo, start, len, tmp);
+		for (j = 0; j < len; j++) {
+			igt_assert_f(tmp[j] == val,
+				     "mismatch in read at %i, got: %i, expected: %i\n",
+				     start + j, tmp[j], val);
+		}
+
+		/* Change contents through gtt to make the pread cachelines
+		 * stale. */
+		val = (i + 17) % 256;
+		blt_bo_fill(staging_bo, scratch_bo, val);
+
+		/* partial write */
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		memset(tmp, i + 63, BO_SIZE);
+
+		drm_intel_bo_subdata(scratch_bo, start, len, tmp);
+
+		copy_bo(scratch_bo, 1, tiled_staging_bo, 1);
+		drm_intel_bo_get_subdata(tiled_staging_bo, 0, BO_SIZE,
+					 compare_tmp);
+
+		for (j = 0; j < start; j++) {
+			igt_assert_f(compare_tmp[j] == val,
+				     "mismatch at %i, got: %i, expected: %i\n",
+				     j, tmp[j], val);
+		}
+		for (; j < start + len; j++) {
+			igt_assert_f(compare_tmp[j] == tmp[0],
+				     "mismatch at %i, got: %i, expected: %i\n",
+				     j, tmp[j], tmp[0]);
+		}
+		for (; j < BO_SIZE; j++) {
+			igt_assert_f(compare_tmp[j] == val,
+				     "mismatch at %i, got: %i, expected: %i\n",
+				     j, tmp[j], val);
+		}
+		drm_intel_gem_bo_unmap_gtt(staging_bo);
+
+		igt_progress("partial read/writes test: ", i, ROUNDS);
+	}
+}
+
+static bool known_swizzling(uint32_t handle)
+{
+	struct drm_i915_gem_get_tiling arg = {
+		.handle = handle,
+	};
+
+	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &arg))
+		return false;
+
+	return arg.phys_swizzle_mode == arg.swizzle_mode;
+}
+
+igt_main
+{
+	uint32_t tiling_mode = I915_TILING_X;
+
+	igt_skip_on_simulation();
+
+	srandom(0xdeadbeef);
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+		devid = intel_get_drm_devid(fd);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+		/* overallocate the buffers we're actually using because */
+		scratch_bo = drm_intel_bo_alloc_tiled(bufmgr, "scratch bo", 1024,
+						      BO_SIZE/4096, 4,
+						      &tiling_mode, &scratch_pitch, 0);
+		igt_assert(tiling_mode == I915_TILING_X);
+		igt_assert(scratch_pitch == 4096);
+
+		/*
+		 * As we want to compare our template tiled pattern against
+		 * the target bo, we need consistent swizzling on both.
+		 */
+		igt_require(known_swizzling(scratch_bo->handle));
+		staging_bo = drm_intel_bo_alloc(bufmgr, "staging bo", BO_SIZE, 4096);
+		tiled_staging_bo = drm_intel_bo_alloc_tiled(bufmgr, "scratch bo", 1024,
+							    BO_SIZE/4096, 4,
+							    &tiling_mode,
+							    &scratch_pitch, 0);
+	}
+
+	igt_subtest("reads")
+		test_partial_reads();
+
+	igt_subtest("writes")
+		test_partial_writes();
+
+	igt_subtest("writes-after-reads")
+		test_partial_read_writes();
+
+	igt_fixture {
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_tiled_pread_basic.c b/tests/i915/gem_tiled_pread_basic.c
new file mode 100644
index 00000000..425bb07e
--- /dev/null
+++ b/tests/i915/gem_tiled_pread_basic.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_pread.c
+ *
+ * This is a test of pread's behavior on tiled objects with respect to the
+ * reported swizzling value.
+ *
+ * The goal is to exercise the slow_bit17_copy path for reading on bit17
+ * machines, but will also be useful for catching swizzling value bugs on
+ * other systems.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+
+IGT_TEST_DESCRIPTION("Test pread behavior on tiled objects with respect to the"
+		     " reported swizzling value.");
+
+#define WIDTH 512
+#define HEIGHT 512
+static uint32_t linear[WIDTH * HEIGHT];
+
+#define PAGE_SIZE 4096
+
+static int tile_width;
+static int tile_height;
+static int tile_size;
+
+static uint32_t
+create_bo(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+	gem_set_tiling(fd, handle, I915_TILING_X, WIDTH * sizeof(uint32_t));
+
+	/* Fill the BO with dwords starting at start_val */
+	data = gem_mmap__gtt(fd, handle, sizeof(linear),
+			     PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		data[i] = i;
+	munmap(data, sizeof(linear));
+
+	return handle;
+}
+
+static int
+swizzle_bit(int bit, int offset)
+{
+	return (offset & (1 << bit)) >> (bit - 6);
+}
+
+struct offset {
+	int base_x, base_y;
+	int tile_x, tile_y;
+};
+
+/* Translate from a swizzled offset in the tiled buffer to the corresponding
+ * value from the original linear buffer.
+ */
+static uint32_t
+calculate_expected(int offset, struct offset *dbg)
+{
+	int tile_off = offset & (tile_size - 1);
+	int tile_base = offset & -tile_size;
+	int tile_index = tile_base / tile_size;
+	int tiles_per_row = 4*WIDTH / tile_width;
+
+	/* base x,y values from the tile (page) index. */
+	dbg->base_y = tile_index / tiles_per_row * tile_height;
+	dbg->base_x = tile_index % tiles_per_row * (tile_width/4);
+
+	/* x, y offsets within the tile */
+	dbg->tile_y = tile_off / tile_width;
+	dbg->tile_x = (tile_off % tile_width) / 4;
+
+	return (dbg->base_y + dbg->tile_y) * WIDTH + dbg->base_x + dbg->tile_x;
+}
+
+igt_simple_main
+{
+	int fd;
+	int i, iter = 100;
+	uint32_t tiling, swizzle;
+	uint32_t handle;
+	uint32_t devid;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	handle = create_bo(fd);
+	igt_require(gem_get_tiling(fd, handle, &tiling, &swizzle));
+
+	devid = intel_get_drm_devid(fd);
+
+	if (IS_GEN2(devid)) {
+		tile_height = 16;
+		tile_width = 128;
+		tile_size = 2048;
+	} else {
+		tile_height = 8;
+		tile_width = 512;
+		tile_size = PAGE_SIZE;
+	}
+
+	/* Read a bunch of random subsets of the data and check that they come
+	 * out right.
+	 */
+	for (i = 0; i < iter; i++) {
+		int size = WIDTH * HEIGHT * 4;
+		int offset = (random() % size) & ~3;
+		int len = (random() % size) & ~3;
+		int j;
+
+		if (len == 0)
+			len = 4;
+
+		if (offset + len > size)
+			len = size - offset;
+
+		if (i == 0) {
+			offset = 0;
+			len = size;
+		}
+
+		gem_read(fd, handle, offset, linear, len);
+
+		/* Translate from offsets in the read buffer to the swizzled
+		 * address that it corresponds to.  This is the opposite of
+		 * what Mesa does (calculate offset to be read given the linear
+		 * offset it's looking for).
+		 */
+		for (j = offset; j < offset + len; j += 4) {
+			struct offset dbg;
+			uint32_t expected, found;
+			int swizzled_offset;
+			const char *swizzle_str;
+
+			switch (swizzle) {
+			case I915_BIT_6_SWIZZLE_NONE:
+				swizzled_offset = j;
+				swizzle_str = "none";
+				break;
+			case I915_BIT_6_SWIZZLE_9:
+				swizzled_offset = j ^
+					swizzle_bit(9, j);
+				swizzle_str = "bit9";
+				break;
+			case I915_BIT_6_SWIZZLE_9_10:
+				swizzled_offset = j ^
+					swizzle_bit(9, j) ^
+					swizzle_bit(10, j);
+				swizzle_str = "bit9^10";
+				break;
+			case I915_BIT_6_SWIZZLE_9_11:
+				swizzled_offset = j ^
+					swizzle_bit(9, j) ^
+					swizzle_bit(11, j);
+				swizzle_str = "bit9^11";
+				break;
+			case I915_BIT_6_SWIZZLE_9_10_11:
+				swizzled_offset = j ^
+					swizzle_bit(9, j) ^
+					swizzle_bit(10, j) ^
+					swizzle_bit(11, j);
+				swizzle_str = "bit9^10^11";
+				break;
+			default:
+				igt_assert_f(0, "Bad swizzle bits; %d\n",
+					     swizzle);
+			}
+			expected = calculate_expected(swizzled_offset, &dbg);
+			found = linear[(j - offset) / 4];
+			igt_assert_f(expected == found,
+				     "Bad read [%d]: %d instead of %d at 0x%08x "
+				     "[tile (%d, %d) subtile (%d, %d)] "
+				     "for read from 0x%08x to 0x%08x, swizzle=%s\n",
+				     i, found, expected, j,
+				     dbg.base_x, dbg.base_y,
+				     dbg.tile_x, dbg.tile_y,
+				     offset, offset + len,
+				     swizzle_str);
+		}
+	}
+
+	close(fd);
+}
diff --git a/tests/i915/gem_tiled_pread_pwrite.c b/tests/i915/gem_tiled_pread_pwrite.c
new file mode 100644
index 00000000..313daa38
--- /dev/null
+++ b/tests/i915/gem_tiled_pread_pwrite.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_pread_pwrite.c
+ *
+ * This is a test of pread's behavior on tiled objects with respect to the
+ * reported swizzling value.
+ *
+ * The goal is to exercise the slow_bit17_copy path for reading on bit17
+ * machines, but will also be useful for catching swizzling value bugs on
+ * other systems.
+ */
+
+/*
+ * Testcase: Test swizzling by testing pwrite does the inverse of pread
+ *
+ * Together with the explicit pread testcase, this should cover our swizzle
+ * handling.
+ *
+ * Note that this test will use swap in an effort to test all of ram.
+ */
+
+#include "igt.h"
+#include "igt_x86.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Test swizzling by testing pwrite does the inverse of"
+		     " pread.");
+
+#define WIDTH 512
+#define HEIGHT 512
+static uint32_t linear[WIDTH * HEIGHT];
+static uint32_t current_tiling_mode;
+
+#define PAGE_SIZE 4096
+
+static uint32_t
+create_bo_and_fill(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+	gem_set_tiling(fd, handle, current_tiling_mode, WIDTH * sizeof(uint32_t));
+
+	/* Fill the BO with dwords starting at start_val */
+	data = gem_mmap__gtt(fd, handle, sizeof(linear),
+			     PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		data[i] = i;
+	munmap(data, sizeof(linear));
+
+	return handle;
+}
+
+static uint32_t
+create_bo(int fd)
+{
+	uint32_t handle;
+
+	handle = gem_create(fd, sizeof(linear));
+	gem_set_tiling(fd, handle, current_tiling_mode, WIDTH * sizeof(uint32_t));
+
+	return handle;
+}
+
+static void copy_wc_page(void *dst, const void *src)
+{
+	igt_memcpy_from_wc(dst, src, PAGE_SIZE);
+}
+
+igt_simple_main
+{
+	uint32_t tiling, swizzle;
+	int count;
+	int fd;
+	
+	fd = drm_open_driver(DRIVER_INTEL);
+	count = SLOW_QUICK(intel_get_total_ram_mb() * 9 / 10, 8) ;
+
+	for (int i = 0; i < count/2; i++) {
+		uint32_t handle, handle_target;
+		char *data;
+		int n;
+
+		current_tiling_mode = I915_TILING_X;
+
+		handle = create_bo_and_fill(fd);
+		gem_get_tiling(fd, handle, &tiling, &swizzle);
+
+		gem_read(fd, handle, 0, linear, sizeof(linear));
+
+		handle_target = create_bo(fd);
+		gem_write(fd, handle_target, 0, linear, sizeof(linear));
+
+		/* Check the target bo's contents. */
+		data = gem_mmap__gtt(fd, handle_target, sizeof(linear), PROT_READ);
+		n = 0;
+		for (int pfn = 0; pfn < sizeof(linear)/PAGE_SIZE; pfn++) {
+			uint32_t page[PAGE_SIZE/sizeof(uint32_t)];
+			copy_wc_page(page, data + PAGE_SIZE*pfn);
+			for (int j = 0; j < PAGE_SIZE/sizeof(uint32_t); j++) {
+				igt_assert_f(page[j] == n,
+					     "mismatch at %i: %i\n",
+					     n, page[j]);
+				n++;
+			}
+		}
+		munmap(data, sizeof(linear));
+
+		/* Leak both bos so that we use all of system mem! */
+		gem_madvise(fd, handle_target, I915_MADV_DONTNEED);
+		gem_madvise(fd, handle, I915_MADV_DONTNEED);
+
+		igt_progress("gem_tiled_pread_pwrite: ", i, count/2);
+	}
+
+	close(fd);
+}
diff --git a/tests/i915/gem_tiled_swapping.c b/tests/i915/gem_tiled_swapping.c
new file mode 100644
index 00000000..dce66806
--- /dev/null
+++ b/tests/i915/gem_tiled_swapping.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/** @file gem_tiled_pread_pwrite.c
+ *
+ * This is a test of pread's behavior on tiled objects with respect to the
+ * reported swizzling value.
+ *
+ * The goal is to exercise the slow_bit17_copy path for reading on bit17
+ * machines, but will also be useful for catching swizzling value bugs on
+ * other systems.
+ */
+
+/*
+ * Testcase: Exercise swizzle code for swapping
+ *
+ * The swizzle checks in the swapin path are at a different place than the ones
+ * for pread/pwrite, so we need to check them separately.
+ *
+ * This test obviously needs swap present (and exits if none is detected).
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include <drm.h>
+
+
+IGT_TEST_DESCRIPTION("Exercise swizzle code for swapping.");
+
+#define WIDTH 512
+#define HEIGHT 512
+#define LINEAR_DWORDS (4 * WIDTH * HEIGHT)
+static uint32_t current_tiling_mode;
+
+#define PAGE_SIZE 4096
+#define AVAIL_RAM 512
+
+static uint32_t
+create_bo(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+
+	handle = gem_create(fd, LINEAR_DWORDS);
+	gem_set_tiling(fd, handle, current_tiling_mode, WIDTH * sizeof(uint32_t));
+
+	data = __gem_mmap__gtt(fd, handle, LINEAR_DWORDS, PROT_READ | PROT_WRITE);
+	if (data == NULL) {
+		gem_close(fd, handle);
+		return 0;
+	}
+	munmap(data, LINEAR_DWORDS);
+
+	return handle;
+}
+
+static void
+fill_bo(int fd, uint32_t handle)
+{
+	uint32_t *data;
+	int i;
+
+	data = gem_mmap__gtt(fd, handle, LINEAR_DWORDS,
+			     PROT_READ | PROT_WRITE);
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		data[i] = i;
+	munmap(data, LINEAR_DWORDS);
+}
+
+static void
+check_bo(int fd, uint32_t handle)
+{
+	uint32_t *data;
+	int j;
+
+	data = gem_mmap__gtt(fd, handle, LINEAR_DWORDS, PROT_READ);
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	j = rand() % (WIDTH * HEIGHT);
+	igt_assert_f(data[j] == j, "mismatch at %i: %i\n", j, data[j]);
+	munmap(data, LINEAR_DWORDS);
+}
+
+uint32_t *bo_handles;
+
+struct thread {
+	pthread_t thread;
+	int *idx_arr;
+	int fd, count;
+};
+
+static void *thread_run(void *data)
+{
+	struct thread *t = data;
+	int i;
+
+	for (i = 0; i < t->count; i++)
+		check_bo(t->fd, bo_handles[t->idx_arr[i]]);
+
+	return NULL;
+}
+
+static void thread_init(struct thread *t, int fd, int count)
+{
+	int i;
+
+	t->fd = fd;
+	t->count = count;
+	t->idx_arr = calloc(count, sizeof(int));
+	igt_assert(t->idx_arr);
+
+	for (i = 0; i < count; i++)
+		t->idx_arr[i] = i;
+
+	igt_permute_array(t->idx_arr, count, igt_exchange_int);
+}
+
+static void thread_fini(struct thread *t)
+{
+	free(t->idx_arr);
+}
+
+static void check_memory_layout(int fd)
+{
+	igt_skip_on_f(igt_debugfs_search(fd, "i915_swizzle_info", "L-shaped"),
+		      "L-shaped memory configuration detected\n");
+
+	igt_debug("normal memory configuration detected, continuing\n");
+}
+
+igt_main
+{
+	struct thread *threads;
+	int fd, n, count, num_threads;
+
+	igt_fixture {
+		size_t lock_size;
+
+		current_tiling_mode = I915_TILING_X;
+
+		fd = drm_open_driver(DRIVER_INTEL);
+
+		intel_purge_vm_caches(fd);
+		check_memory_layout(fd);
+
+		/* lock RAM, leaving only 512MB available */
+		lock_size = max(0, intel_get_total_ram_mb() - AVAIL_RAM);
+		igt_lock_mem(lock_size);
+
+		/* need slightly more than available memory */
+		count = min(intel_get_total_ram_mb(), AVAIL_RAM) * 1.25;
+		bo_handles = calloc(count, sizeof(uint32_t));
+		igt_assert(bo_handles);
+
+		num_threads = gem_available_fences(fd);
+		threads = calloc(num_threads, sizeof(struct thread));
+		igt_assert(threads);
+
+		igt_info("Using %d 1MiB objects (available RAM: %ld/%ld, swap: %ld)\n",
+			 count,
+			 (long)intel_get_avail_ram_mb(),
+			 (long)intel_get_total_ram_mb(),
+			 (long)intel_get_total_swap_mb());
+		intel_require_memory(count, 1024*1024, CHECK_RAM | CHECK_SWAP);
+
+		for (n = 0; n < count; n++) {
+			bo_handles[n] = create_bo(fd);
+			/* Not enough mmap address space possible. */
+			igt_require(bo_handles[n]);
+		}
+	}
+
+	igt_subtest("non-threaded") {
+		for (n = 0; n < count; n++)
+			fill_bo(fd, bo_handles[n]);
+
+		thread_init(&threads[0], fd, count);
+		thread_run(&threads[0]);
+		thread_run(&threads[0]);
+		thread_run(&threads[0]);
+		thread_fini(&threads[0]);
+	}
+
+	/* Once more with threads */
+	igt_subtest("threaded") {
+		for (n = 0; n < count; n++)
+			fill_bo(fd, bo_handles[n]);
+
+		for (n = 0; n < num_threads; n++)
+			thread_init(&threads[n], fd, count);
+
+		thread_run(&threads[0]);
+		for (n = 0; n < num_threads; n++)
+			pthread_create(&threads[n].thread, NULL, thread_run, &threads[n]);
+		for (n = 0; n < num_threads; n++)
+			pthread_join(threads[n].thread, NULL);
+		thread_run(&threads[0]);
+
+		for (n = 0; n < num_threads; n++)
+			thread_fini(&threads[n]);
+	}
+
+	close(fd);
+}
diff --git a/tests/i915/gem_tiled_wb.c b/tests/i915/gem_tiled_wb.c
new file mode 100644
index 00000000..67d54bd3
--- /dev/null
+++ b/tests/i915/gem_tiled_wb.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file gem_tiled_wc.c
+ *
+ * This is a test of write-combining mmap's behavior on tiled objects
+ * with respect to the reported swizzling value.
+ *
+ * The goal is to exercise the complications that arise when using a linear
+ * view of a tiled object that is subject to hardware swizzling. This is
+ * useful to check that we are presenting the correct view of the object
+ * to userspace, and that userspace has to respect the swizzle.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("This is a test of write-combining mmap's behavior on"
+		     " tiled objects with respect to the reported swizzling"
+		     " value.");
+
+#define WIDTH 512
+#define HEIGHT 512
+#define SIZE (WIDTH*HEIGHT*sizeof(uint32_t))
+
+#define PAGE_SIZE 4096
+
+static int tile_width;
+static int tile_height;
+static int tile_size;
+
+static uint32_t
+create_bo(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+	int i;
+
+	handle = gem_create(fd, SIZE);
+	gem_set_tiling(fd, handle, I915_TILING_X, WIDTH * sizeof(uint32_t));
+
+	/* Write throught the fence to tiled the data.
+	 * We then manually detile on reading back through the mmap(wc).
+	 */
+	data = gem_mmap__gtt(fd, handle, SIZE, PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		data[i] = i;
+	munmap(data, SIZE);
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0);
+	return handle;
+}
+
+static int
+swizzle_bit(int bit, int offset)
+{
+	return (offset & (1 << bit)) >> (bit - 6);
+}
+
+/* Translate from a swizzled offset in the tiled buffer to the corresponding
+ * value from the original linear buffer.
+ */
+static uint32_t
+calculate_expected(int offset)
+{
+	int tile_off = offset & (tile_size - 1);
+	int tile_base = offset & -tile_size;
+	int tile_index = tile_base / tile_size;
+	int tiles_per_row = 4*WIDTH / tile_width;
+
+	/* base x,y values from the tile (page) index. */
+	int base_y = tile_index / tiles_per_row * tile_height;
+	int base_x = tile_index % tiles_per_row * (tile_width/4);
+
+	/* x, y offsets within the tile */
+	int tile_y = tile_off / tile_width;
+	int tile_x = (tile_off % tile_width) / 4;
+
+	igt_debug("%3d, %3d, %3d,%3d\n", base_x, base_y, tile_x, tile_y);
+	return (base_y + tile_y) * WIDTH + base_x + tile_x;
+}
+
+static void
+get_tiling(int fd, uint32_t handle, uint32_t *tiling, uint32_t *swizzle)
+{
+	struct drm_i915_gem_get_tiling2 {
+		uint32_t handle;
+		uint32_t tiling_mode;
+		uint32_t swizzle_mode;
+		uint32_t phys_swizzle_mode;
+	} arg;
+#define DRM_IOCTL_I915_GEM_GET_TILING2	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling2)
+
+	memset(&arg, 0, sizeof(arg));
+	arg.handle = handle;
+
+	do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING2, &arg);
+	igt_require(arg.phys_swizzle_mode == arg.swizzle_mode);
+
+	*tiling = arg.tiling_mode;
+	*swizzle = arg.swizzle_mode;
+}
+
+igt_simple_main
+{
+	int fd;
+	int i, iter = 100;
+	uint32_t tiling, swizzle;
+	uint32_t handle;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	handle = create_bo(fd);
+	get_tiling(fd, handle, &tiling, &swizzle);
+
+	if (IS_GEN2(intel_get_drm_devid(fd))) {
+		tile_height = 16;
+		tile_width = 128;
+		tile_size = 2048;
+	} else {
+		tile_height = 8;
+		tile_width = 512;
+		tile_size = PAGE_SIZE;
+	}
+
+	/* Read a bunch of random subsets of the data and check that they come
+	 * out right.
+	 */
+	for (i = 0; i < iter; i++) {
+		int size = WIDTH * HEIGHT * 4;
+		int offset = (random() % size) & ~3;
+		int len = (random() % size) & ~3;
+		int first_page, last_page;
+		uint32_t *linear;
+		int j;
+
+		if (len == 0)
+			len = 4;
+
+		if (offset + len > size)
+			len = size - offset;
+
+		if (i == 0) {
+			offset = 0;
+			len = size;
+		}
+
+		first_page = offset & ~(PAGE_SIZE-1);
+		last_page = (offset + len + PAGE_SIZE) & ~(PAGE_SIZE-1);
+		offset -= first_page;
+
+		linear = gem_mmap__cpu(fd, handle, first_page, last_page - first_page, PROT_READ);
+
+		/* Translate from offsets in the read buffer to the swizzled
+		 * address that it corresponds to.  This is the opposite of
+		 * what Mesa does (calculate offset to be read given the linear
+		 * offset it's looking for).
+		 */
+		for (j = offset; j < offset + len; j += 4) {
+			uint32_t expected_val, found_val;
+			int swizzled_offset = j + first_page;
+			const char *swizzle_str;
+
+			switch (swizzle) {
+			case I915_BIT_6_SWIZZLE_NONE:
+				swizzle_str = "none";
+				break;
+			case I915_BIT_6_SWIZZLE_9:
+				swizzled_offset ^=
+					swizzle_bit(9, swizzled_offset);
+				swizzle_str = "bit9";
+				break;
+			case I915_BIT_6_SWIZZLE_9_10:
+				swizzled_offset ^=
+					swizzle_bit(9, swizzled_offset) ^
+					swizzle_bit(10, swizzled_offset);
+				swizzle_str = "bit9^10";
+				break;
+			case I915_BIT_6_SWIZZLE_9_11:
+				swizzled_offset ^=
+					swizzle_bit(9, swizzled_offset) ^
+					swizzle_bit(11, swizzled_offset);
+				swizzle_str = "bit9^11";
+				break;
+			case I915_BIT_6_SWIZZLE_9_10_11:
+				swizzled_offset ^=
+					swizzle_bit(9, swizzled_offset) ^
+					swizzle_bit(10, swizzled_offset) ^
+					swizzle_bit(11, swizzled_offset);
+				swizzle_str = "bit9^10^11";
+				break;
+			default:
+				igt_skip("unknown swizzling");
+				break;
+			}
+			expected_val = calculate_expected(swizzled_offset);
+			found_val = linear[j / 4];
+			igt_assert_f(expected_val == found_val,
+				     "Bad read [%d]: %d instead of %d at 0x%08x "
+				     "for read from 0x%08x to 0x%08x, swizzle=%s\n",
+				     i, found_val, expected_val, j + first_page,
+				     offset, offset + len,
+				     swizzle_str);
+		}
+		munmap(linear, last_page - first_page);
+	}
+
+	close(fd);
+}
diff --git a/tests/i915/gem_tiled_wc.c b/tests/i915/gem_tiled_wc.c
new file mode 100644
index 00000000..65ac3851
--- /dev/null
+++ b/tests/i915/gem_tiled_wc.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file gem_tiled_wc.c
+ *
+ * This is a test of write-combining mmap's behavior on tiled objects
+ * with respect to the reported swizzling value.
+ *
+ * The goal is to exercise the complications that arise when using a linear
+ * view of a tiled object that is subject to hardware swizzling. This is
+ * useful to check that we are presenting the correct view of the object
+ * to userspace, and that userspace has to respect the swizzle.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+#define SIZE (WIDTH*HEIGHT*sizeof(uint32_t))
+
+#define PAGE_SIZE 4096
+
+static int tile_width;
+static int tile_height;
+static int tile_size;
+
+static uint32_t
+create_bo(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+	int i;
+
+	handle = gem_create(fd, SIZE);
+	gem_set_tiling(fd, handle, I915_TILING_X, WIDTH * sizeof(uint32_t));
+
+	/* Fill the BO with dwords starting at start_val */
+	data = gem_mmap__gtt(fd, handle, SIZE, PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		data[i] = i;
+	munmap(data, SIZE);
+
+	return handle;
+}
+
+static int
+swizzle_bit(int bit, int offset)
+{
+	return (offset & (1 << bit)) >> (bit - 6);
+}
+
+/* Translate from a swizzled offset in the tiled buffer to the corresponding
+ * value from the original linear buffer.
+ */
+static uint32_t
+calculate_expected(int offset)
+{
+	int tile_off = offset & (tile_size - 1);
+	int tile_base = offset & -tile_size;
+	int tile_index = tile_base / tile_size;
+	int tiles_per_row = 4*WIDTH / tile_width;
+
+	/* base x,y values from the tile (page) index. */
+	int base_y = tile_index / tiles_per_row * tile_height;
+	int base_x = tile_index % tiles_per_row * (tile_width/4);
+
+	/* x, y offsets within the tile */
+	int tile_y = tile_off / tile_width;
+	int tile_x = (tile_off % tile_width) / 4;
+
+	igt_debug("%s(%d): %3d, %3d, %3d,%3d = %d\n",
+		  __func__, offset, base_x, base_y, tile_x, tile_y,
+		  (base_y + tile_y) * WIDTH + base_x + tile_x);
+	return (base_y + tile_y) * WIDTH + base_x + tile_x;
+}
+
+static void
+get_tiling(int fd, uint32_t handle, uint32_t *tiling, uint32_t *swizzle)
+{
+	struct drm_i915_gem_get_tiling2 {
+		uint32_t handle;
+		uint32_t tiling_mode;
+		uint32_t swizzle_mode;
+		uint32_t phys_swizzle_mode;
+	} arg;
+#define DRM_IOCTL_I915_GEM_GET_TILING2	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling2)
+
+	memset(&arg, 0, sizeof(arg));
+	arg.handle = handle;
+
+	do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING2, &arg);
+	igt_require(arg.phys_swizzle_mode == arg.swizzle_mode);
+
+	*tiling = arg.tiling_mode;
+	*swizzle = arg.swizzle_mode;
+}
+
+igt_simple_main
+{
+	int fd;
+	int i, iter = 100;
+	uint32_t tiling, swizzle;
+	uint32_t handle;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	gem_require_mmap_wc(fd);
+
+	handle = create_bo(fd);
+	get_tiling(fd, handle, &tiling, &swizzle);
+
+	if (IS_GEN2(intel_get_drm_devid(fd))) {
+		tile_height = 16;
+		tile_width = 128;
+		tile_size = 2048;
+	} else {
+		tile_height = 8;
+		tile_width = 512;
+		tile_size = PAGE_SIZE;
+	}
+
+	/* Read a bunch of random subsets of the data and check that they come
+	 * out right.
+	 */
+	for (i = 0; i < iter; i++) {
+		int size = WIDTH * HEIGHT * 4;
+		int offset = (random() % size) & ~3;
+		int len = (random() % size) & ~3;
+		int first_page, last_page;
+		uint32_t *linear;
+		int j;
+
+		if (len == 0)
+			len = 4;
+
+		if (offset + len > size)
+			len = size - offset;
+
+		if (i == 0) {
+			offset = 0;
+			len = size;
+		}
+
+		first_page = offset & ~(PAGE_SIZE-1);
+		last_page = (offset + len + PAGE_SIZE) & ~(PAGE_SIZE-1);
+
+		linear = gem_mmap__wc(fd, handle, first_page, last_page - first_page, PROT_READ);
+
+		/* Translate from offsets in the read buffer to the swizzled
+		 * address that it corresponds to.  This is the opposite of
+		 * what Mesa does (calculate offset to be read given the linear
+		 * offset it's looking for).
+		 */
+		for (j = offset; j < offset + len; j += 4) {
+			uint32_t expected_val, found_val;
+			int swizzled_offset;
+			const char *swizzle_str;
+
+			switch (swizzle) {
+			case I915_BIT_6_SWIZZLE_NONE:
+				swizzled_offset = j;
+				swizzle_str = "none";
+				break;
+			case I915_BIT_6_SWIZZLE_9:
+				swizzled_offset = j ^
+					swizzle_bit(9, j);
+				swizzle_str = "bit9";
+				break;
+			case I915_BIT_6_SWIZZLE_9_10:
+				swizzled_offset = j ^
+					swizzle_bit(9, j) ^
+					swizzle_bit(10, j);
+				swizzle_str = "bit9^10";
+				break;
+			case I915_BIT_6_SWIZZLE_9_11:
+				swizzled_offset = j ^
+					swizzle_bit(9, j) ^
+					swizzle_bit(11, j);
+				swizzle_str = "bit9^11";
+				break;
+			case I915_BIT_6_SWIZZLE_9_10_11:
+				swizzled_offset = j ^
+					swizzle_bit(9, j) ^
+					swizzle_bit(10, j) ^
+					swizzle_bit(11, j);
+				swizzle_str = "bit9^10^11";
+				break;
+			default:
+				igt_skip("unknown swizzling");
+				break;
+			}
+			igt_debug("Checking offset %d swizzled %s -> %d\n",
+				  j, swizzle_str, swizzled_offset);
+			expected_val = calculate_expected(swizzled_offset);
+			found_val = linear[(j - first_page)/ 4];
+			igt_assert_f(expected_val == found_val,
+				     "Bad read [%d]: %d instead of %d at 0x%08x "
+				     "for read from 0x%08x to 0x%08x, swizzle=%s\n",
+				     i, found_val, expected_val, j,
+				     offset, offset + len,
+				     swizzle_str);
+		}
+		munmap(linear, last_page - first_page);
+	}
+
+	close(fd);
+}
diff --git a/tests/i915/gem_tiling_max_stride.c b/tests/i915/gem_tiling_max_stride.c
new file mode 100644
index 00000000..a6f97a91
--- /dev/null
+++ b/tests/i915/gem_tiling_max_stride.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ville Syrjälä <ville.syrjala@linux.intel.com>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+IGT_TEST_DESCRIPTION("Check that max fence stride works.");
+
+static void do_test_invalid_tiling(int fd, uint32_t handle, int tiling, int stride)
+{
+	igt_assert(__gem_set_tiling(fd, handle, tiling, tiling ? stride : 0) == -EINVAL);
+}
+
+static void test_invalid_tiling(int fd, uint32_t handle, int stride)
+{
+	do_test_invalid_tiling(fd, handle, I915_TILING_X, stride);
+	do_test_invalid_tiling(fd, handle, I915_TILING_Y, stride);
+}
+
+/**
+ * Testcase: Check that max fence stride works
+ */
+
+igt_simple_main
+{
+	int fd;
+	uint32_t *ptr;
+	uint32_t *data;
+	uint32_t handle;
+	uint32_t stride;
+	uint32_t size;
+	uint32_t devid;
+	int i = 0, x, y;
+	int tile_width = 512;
+	int tile_height = 8;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	devid = intel_get_drm_devid(fd);
+
+	if (intel_gen(devid) >= 7)
+		stride = 256 * 1024;
+	else if (intel_gen(devid) >= 4)
+		stride = 128 * 1024;
+	else {
+		if (IS_GEN2(devid)) {
+			tile_width = 128;
+			tile_height = 16;
+		}
+		stride = 8 * 1024;
+	}
+
+	size = stride * tile_height;
+
+	data = malloc(size);
+	igt_assert(data);
+
+	/* Fill each line with the line number */
+	for (y = 0; y < tile_height; y++) {
+		for (x = 0; x < stride / 4; x++)
+			data[i++] = y;
+	}
+
+	handle = gem_create(fd, size);
+
+	ptr = gem_mmap__gtt(fd, handle, size, PROT_READ | PROT_WRITE);
+
+	test_invalid_tiling(fd, handle, 0);
+	test_invalid_tiling(fd, handle, 64);
+	test_invalid_tiling(fd, handle, stride - 1);
+	test_invalid_tiling(fd, handle, stride + 1);
+	test_invalid_tiling(fd, handle, stride + 127);
+	test_invalid_tiling(fd, handle, stride + 128);
+	test_invalid_tiling(fd, handle, stride + tile_width - 1);
+	test_invalid_tiling(fd, handle, stride + tile_width);
+	test_invalid_tiling(fd, handle, stride * 2);
+	test_invalid_tiling(fd, handle, INT_MAX);
+	test_invalid_tiling(fd, handle, UINT_MAX);
+
+	gem_set_tiling(fd, handle, I915_TILING_X, stride);
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	memcpy(ptr, data, size);
+
+	gem_set_tiling(fd, handle, I915_TILING_NONE, 0);
+
+	memcpy(data, ptr, size);
+
+	/* Check that each tile contains the expected pattern */
+	for (i = 0; i < size / 4; ) {
+		for (y = 0; y < tile_height; y++) {
+			for (x = 0; x < tile_width / 4; x++) {
+				igt_assert(y == data[i]);
+				i++;
+			}
+		}
+	}
+
+	munmap(ptr, size);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_unfence_active_buffers.c b/tests/i915/gem_unfence_active_buffers.c
new file mode 100644
index 00000000..b78fbafa
--- /dev/null
+++ b/tests/i915/gem_unfence_active_buffers.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/** @file gem_unfence_active_buffers.c
+ *
+ * Testcase: Check for use-after free in the fence stealing code
+ *
+ * If we're stealing the fence of a active object where the active list is the
+ * only thing holding a reference, we need to be careful not to access the old
+ * object we're stealing the fence from after that reference has been dropped by
+ * retire_requests.
+ *
+ * Note that this needs slab poisoning enabled in the kernel to reliably hit the
+ * problem - the race window is too small.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdbool.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Check for use-after-free in the fence stealing code.");
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+uint32_t devid;
+
+#define TEST_SIZE (1024*1024)
+#define TEST_STRIDE (4*1024)
+
+uint32_t data[TEST_SIZE/4];
+
+igt_simple_main
+{
+	int i, ret, fd, num_fences;
+	drm_intel_bo *busy_bo, *test_bo;
+	uint32_t tiling = I915_TILING_X;
+
+	igt_skip_on_simulation();
+
+	for (i = 0; i < 1024*256; i++)
+		data[i] = i;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	devid = intel_get_drm_devid(fd);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	igt_info("filling ring\n");
+	busy_bo = drm_intel_bo_alloc(bufmgr, "busy bo bo", 16*1024*1024, 4096);
+
+	for (i = 0; i < 250; i++) {
+		BLIT_COPY_BATCH_START(0);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  2*1024*4);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((2048) << 16 | (2048));
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(2*1024*4);
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+	intel_batchbuffer_flush(batch);
+
+	num_fences = gem_available_fences(fd);
+	igt_info("creating havoc on %i fences\n", num_fences);
+
+	for (i = 0; i < num_fences*2; i++) {
+		test_bo = drm_intel_bo_alloc(bufmgr, "test_bo",
+					     TEST_SIZE, 4096);
+		ret = drm_intel_bo_set_tiling(test_bo, &tiling, TEST_STRIDE);
+		igt_assert(ret == 0);
+
+		drm_intel_bo_disable_reuse(test_bo);
+
+		BLIT_COPY_BATCH_START(0);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  TEST_STRIDE);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH((1) << 16 | (1));
+		OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(TEST_STRIDE);
+		OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+		intel_batchbuffer_flush(batch);
+		igt_info("test bo offset: %#lx\n", test_bo->offset);
+
+		drm_intel_bo_unreference(test_bo);
+	}
+
+	/* launch a few batchs to ensure the damaged slab objects get reused. */
+	for (i = 0; i < 10; i++) {
+		BLIT_COPY_BATCH_START(0);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  2*1024*4);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((1) << 16 | (1));
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(2*1024*4);
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (batch->gen >= 8) {
+			BEGIN_BATCH(3, 0);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+	intel_batchbuffer_flush(batch);
+}
diff --git a/tests/i915/gem_unref_active_buffers.c b/tests/i915/gem_unref_active_buffers.c
new file mode 100644
index 00000000..4fafdd04
--- /dev/null
+++ b/tests/i915/gem_unref_active_buffers.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/*
+ * Testcase: Unreferencing of active buffers
+ *
+ * Execs buffers and immediately unreferences them, hence the kernel active list
+ * will be the last one to hold a reference on them. Usually libdrm bo caching
+ * prevents that by keeping another reference.
+ */
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Test unreferencing of active buffers.");
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *load_bo;
+
+igt_simple_main
+{
+	int fd, i;
+
+	igt_skip_on_simulation();
+
+	fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(fd);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	igt_assert(bufmgr);
+	/* don't enable buffer reuse!! */
+	//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(batch);
+
+	/* put some load onto the gpu to keep the light buffers active for long
+	 * enough */
+	for (i = 0; i < 1000; i++) {
+		load_bo = drm_intel_bo_alloc(bufmgr, "target bo", 1024*4096, 4096);
+		igt_assert(load_bo);
+
+		BLIT_COPY_BATCH_START(0);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  4096);
+		OUT_BATCH(0); /* dst x1,y1 */
+		OUT_BATCH((1024 << 16) | 512);
+		OUT_RELOC_FENCED(load_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH((0 << 16) | 512); /* src x1, y1 */
+		OUT_BATCH(4096);
+		OUT_RELOC_FENCED(load_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		intel_batchbuffer_flush(batch);
+
+		drm_intel_bo_disable_reuse(load_bo);
+		drm_intel_bo_unreference(load_bo);
+	}
+
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+}
diff --git a/tests/i915/gem_userptr_blits.c b/tests/i915/gem_userptr_blits.c
new file mode 100644
index 00000000..909dd19d
--- /dev/null
+++ b/tests/i915/gem_userptr_blits.c
@@ -0,0 +1,2037 @@
+/*
+ * Copyright © 2009-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *    Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+ *
+ */
+
+/** @file gem_userptr_blits.c
+ *
+ * This is a test of doing many blits using a mixture of normal system pages
+ * and uncached linear buffers, with a working set larger than the
+ * aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <setjmp.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <openssl/sha.h>
+#include <signal.h>
+#include <pthread.h>
+#include <time.h>
+
+#include <linux/memfd.h>
+
+#include "drm.h"
+#include "i915_drm.h"
+
+#include "intel_bufmgr.h"
+
+#include "eviction_common.c"
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define LOCAL_EXEC_OBJECT_SUPPORTS_48B (1 << 3)
+
+static uint32_t userptr_flags = LOCAL_I915_USERPTR_UNSYNCHRONIZED;
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static uint32_t linear[WIDTH*HEIGHT];
+
+static void gem_userptr_test_unsynchronized(void)
+{
+	userptr_flags = LOCAL_I915_USERPTR_UNSYNCHRONIZED;
+}
+
+static void gem_userptr_test_synchronized(void)
+{
+	userptr_flags = 0;
+}
+
+static void gem_userptr_sync(int fd, uint32_t handle)
+{
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+}
+
+static int copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[12];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int ret, i=0;
+
+	batch[i++] = XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB;
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i - 1] |= 8;
+	else
+		batch[i - 1] |= 6;
+
+	batch[i++] = (3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  WIDTH*4;
+	batch[i++] = 0; /* dst x1,y1 */
+	batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
+	batch[i++] = 0; /* dst reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0;
+	batch[i++] = 0; /* src x1,y1 */
+	batch[i++] = WIDTH*4;
+	batch[i++] = 0; /* src reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0;
+	batch[i++] = MI_BATCH_BUFFER_END;
+	batch[i++] = MI_NOOP;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	reloc[0].target_handle = dst;
+	reloc[0].delta = 0;
+	reloc[0].offset = 4 * sizeof(batch[0]);
+	reloc[0].presumed_offset = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	reloc[1].target_handle = src;
+	reloc[1].delta = 0;
+	reloc[1].offset = 7 * sizeof(batch[0]);
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		reloc[1].offset += sizeof(batch[0]);
+	reloc[1].presumed_offset = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+
+	memset(&exec, 0, sizeof(exec));
+	memset(obj, 0, sizeof(obj));
+
+	obj[exec.buffer_count].handle = dst;
+	obj[exec.buffer_count].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B;
+	exec.buffer_count++;
+
+	if (src != dst) {
+		obj[exec.buffer_count].handle = src;
+		obj[exec.buffer_count].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B;
+		exec.buffer_count++;
+	}
+
+	obj[exec.buffer_count].handle = handle;
+	obj[exec.buffer_count].relocation_count = 2;
+	obj[exec.buffer_count].relocs_ptr = to_user_pointer(reloc);
+	obj[exec.buffer_count].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B;
+	exec.buffer_count++;
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0;
+
+	ret = __gem_execbuf(fd, &exec);
+	gem_close(fd, handle);
+
+	return ret;
+}
+
+static int
+blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo)
+{
+	uint32_t batch[12];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 *obj;
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int n, ret, i=0;
+
+	batch[i++] = XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB;
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i - 1] |= 8;
+	else
+		batch[i - 1] |= 6;
+	batch[i++] = (3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  WIDTH*4;
+	batch[i++] = 0; /* dst x1,y1 */
+	batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
+	batch[i++] = 0; /* dst reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0;
+	batch[i++] = 0; /* src x1,y1 */
+	batch[i++] = WIDTH*4;
+	batch[i++] = 0; /* src reloc */
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		batch[i++] = 0;
+	batch[i++] = MI_BATCH_BUFFER_END;
+	batch[i++] = MI_NOOP;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	reloc[0].target_handle = dst;
+	reloc[0].delta = 0;
+	reloc[0].offset = 4 * sizeof(batch[0]);
+	reloc[0].presumed_offset = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	reloc[1].target_handle = src;
+	reloc[1].delta = 0;
+	reloc[1].offset = 7 * sizeof(batch[0]);
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		reloc[1].offset += sizeof(batch[0]);
+	reloc[1].presumed_offset = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+
+	memset(&exec, 0, sizeof(exec));
+	obj = calloc(n_bo + 1, sizeof(*obj));
+	for (n = 0; n < n_bo; n++) {
+		obj[n].handle = all_bo[n];
+		obj[n].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B;
+	}
+	obj[n].handle = handle;
+	obj[n].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B;
+	obj[n].relocation_count = 2;
+	obj[n].relocs_ptr = to_user_pointer(reloc);
+
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.buffer_count = n_bo + 1;
+	exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0;
+
+	ret = __gem_execbuf(fd, &exec);
+	gem_close(fd, handle);
+	free(obj);
+
+	return ret;
+}
+
+static void store_dword(int fd, uint32_t target,
+			uint32_t offset, uint32_t value)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[16];
+	int i;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	execbuf.flags = 0;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = target;
+	obj[1].handle = gem_create(fd, 4096);
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj[0].handle;
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = offset;
+	reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+	obj[1].relocs_ptr = to_user_pointer(&reloc);
+	obj[1].relocation_count = 1;
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = offset;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = offset;
+		reloc.offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = offset;
+	}
+	batch[++i] = value;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
+	gem_execbuf(fd, &execbuf);
+	gem_close(fd, obj[1].handle);
+}
+
+static uint32_t
+create_userptr(int fd, uint32_t val, uint32_t *ptr)
+{
+	uint32_t handle;
+	int i;
+
+	gem_userptr(fd, ptr, sizeof(linear), 0, userptr_flags, &handle);
+	igt_assert(handle != 0);
+
+	/* Fill the BO with dwords starting at val */
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		ptr[i] = val++;
+
+	return handle;
+}
+
+static void **handle_ptr_map;
+static unsigned *handle_size_map;
+static unsigned int num_handle_map;
+
+static void reset_handle_ptr(void)
+{
+	if (num_handle_map == 0)
+		return;
+
+	free(handle_ptr_map);
+	handle_ptr_map = NULL;
+
+	free(handle_size_map);
+	handle_size_map = NULL;
+
+	num_handle_map = 0;
+}
+
+static void add_handle_ptr(uint32_t handle, void *ptr, int size)
+{
+	if (handle >= num_handle_map) {
+		int max = (4096 + handle) & -4096;
+
+		handle_ptr_map = realloc(handle_ptr_map,
+					 max * sizeof(void*));
+		igt_assert(handle_ptr_map);
+		memset(handle_ptr_map + num_handle_map, 0,
+		       (max - num_handle_map) * sizeof(void*));
+
+		handle_size_map = realloc(handle_size_map,
+					  max * sizeof(unsigned));
+		igt_assert(handle_size_map);
+		memset(handle_ptr_map + num_handle_map, 0,
+		       (max - num_handle_map) * sizeof(unsigned));
+
+		num_handle_map = max;
+	}
+
+	handle_ptr_map[handle] = ptr;
+	handle_size_map[handle] = size;
+}
+
+static void *get_handle_ptr(uint32_t handle)
+{
+	igt_assert(handle < num_handle_map);
+	return handle_ptr_map[handle];
+}
+
+static void free_handle_ptr(uint32_t handle)
+{
+	igt_assert(handle < num_handle_map);
+	igt_assert(handle_ptr_map[handle]);
+
+	munmap(handle_ptr_map[handle], handle_size_map[handle]);
+	handle_ptr_map[handle] = NULL;
+}
+
+static uint32_t create_userptr_bo(int fd, uint64_t size)
+{
+	void *ptr;
+	uint32_t handle;
+
+	ptr = mmap(NULL, size,
+		   PROT_READ | PROT_WRITE,
+		   MAP_ANONYMOUS | MAP_SHARED,
+		   -1, 0);
+	igt_assert(ptr != MAP_FAILED);
+
+	gem_userptr(fd, (uint32_t *)ptr, size, 0, userptr_flags, &handle);
+	add_handle_ptr(handle, ptr, size);
+
+	return handle;
+}
+
+static void flink_userptr_bo(uint32_t old_handle, uint32_t new_handle)
+{
+	igt_assert(old_handle < num_handle_map);
+	igt_assert(handle_ptr_map[old_handle]);
+
+	add_handle_ptr(new_handle,
+		       handle_ptr_map[old_handle],
+		       handle_size_map[old_handle]);
+}
+
+static void clear(int fd, uint32_t handle, uint64_t size)
+{
+	void *ptr = get_handle_ptr(handle);
+
+	igt_assert(ptr != NULL);
+
+	memset(ptr, 0, size);
+}
+
+static void free_userptr_bo(int fd, uint32_t handle)
+{
+	gem_close(fd, handle);
+	free_handle_ptr(handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+
+	/* Fill the BO with dwords starting at val */
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		linear[i] = val++;
+	gem_write(fd, handle, 0, linear, sizeof(linear));
+
+	return handle;
+}
+
+static void
+check_cpu(uint32_t *ptr, uint32_t val)
+{
+	int i;
+
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		igt_assert_f(ptr[i] == val,
+			     "Expected 0x%08x, found 0x%08x "
+			     "at offset 0x%08x\n",
+			     val, ptr[i], i * 4);
+		val++;
+	}
+}
+
+static void
+check_gpu(int fd, uint32_t handle, uint32_t val)
+{
+	gem_read(fd, handle, 0, linear, sizeof(linear));
+	check_cpu(linear, val);
+}
+
+static int has_userptr(int fd)
+{
+	uint32_t handle = 0;
+	void *ptr;
+	uint32_t oldflags;
+	int ret;
+
+	igt_assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
+	oldflags = userptr_flags;
+	gem_userptr_test_unsynchronized();
+	ret = __gem_userptr(fd, ptr, PAGE_SIZE, 0, userptr_flags, &handle);
+	userptr_flags = oldflags;
+	if (ret != 0) {
+		free(ptr);
+		return 0;
+	}
+
+	gem_close(fd, handle);
+	free(ptr);
+
+	return handle != 0;
+}
+
+static int test_input_checking(int fd)
+{
+	struct local_i915_gem_userptr userptr;
+	int ret;
+
+	/* Invalid flags. */
+	memset(&userptr, 0, sizeof(userptr));
+	userptr.user_ptr = 0;
+	userptr.user_size = 0;
+	userptr.flags = ~0;
+	ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
+	igt_assert_neq(ret, 0);
+
+	/* Too big. */
+	memset(&userptr, 0, sizeof(userptr));
+	userptr.user_ptr = 0;
+	userptr.user_size = ~0;
+	userptr.flags = 0;
+	ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
+	igt_assert_neq(ret, 0);
+
+	/* Both wrong. */
+	memset(&userptr, 0, sizeof(userptr));
+	userptr.user_ptr = 0;
+	userptr.user_size = ~0;
+	userptr.flags = ~0;
+	ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
+	igt_assert_neq(ret, 0);
+
+	/* Zero user_size. */
+	memset(&userptr, 0, sizeof(userptr));
+	userptr.user_ptr = 0;
+	userptr.user_size = 0;
+	userptr.flags = 0;
+	ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
+	igt_assert_neq(ret, 0);
+
+	return 0;
+}
+
+static int test_access_control(int fd)
+{
+	/* CAP_SYS_ADMIN is needed for UNSYNCHRONIZED mappings. */
+	gem_userptr_test_unsynchronized();
+	igt_require(has_userptr(fd));
+
+	igt_fork(child, 1) {
+		void *ptr;
+		int ret;
+		uint32_t handle;
+
+		igt_drop_root();
+
+		igt_assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
+
+		ret = __gem_userptr(fd, ptr, PAGE_SIZE, 0, userptr_flags, &handle);
+		if (ret == 0)
+			gem_close(fd, handle);
+		free(ptr);
+		igt_assert_eq(ret, -EPERM);
+	}
+
+	igt_waitchildren();
+
+	return 0;
+}
+
+static int test_invalid_null_pointer(int fd)
+{
+	uint32_t handle;
+
+	/* NULL pointer. */
+	gem_userptr(fd, NULL, PAGE_SIZE, 0, userptr_flags, &handle);
+
+	igt_assert_neq(copy(fd, handle, handle), 0); /* QQQ Precise errno? */
+	gem_close(fd, handle);
+
+	return 0;
+}
+
+static int test_invalid_gtt_mapping(int fd)
+{
+	struct drm_i915_gem_mmap_gtt arg;
+	uint32_t handle;
+	char *gtt, *map;
+
+	/* Anonymous mapping to find a hole */
+	map = mmap(NULL, sizeof(linear) + 2 * PAGE_SIZE,
+		   PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS,
+		   -1, 0);
+	igt_assert(map != MAP_FAILED);
+
+	gem_userptr(fd, map, sizeof(linear) + 2 * PAGE_SIZE, 0, userptr_flags, &handle);
+	igt_assert_eq(copy(fd, handle, handle), 0);
+	gem_close(fd, handle);
+
+	gem_userptr(fd, map, PAGE_SIZE, 0, userptr_flags, &handle);
+	igt_assert_eq(copy(fd, handle, handle), 0);
+	gem_close(fd, handle);
+
+	gem_userptr(fd, map + sizeof(linear) + PAGE_SIZE, PAGE_SIZE, 0, userptr_flags, &handle);
+	igt_assert_eq(copy(fd, handle, handle), 0);
+	gem_close(fd, handle);
+
+	/* GTT mapping */
+	memset(&arg, 0, sizeof(arg));
+	arg.handle = create_bo(fd, 0);
+	do_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &arg);
+	gtt = mmap(map + PAGE_SIZE, sizeof(linear),
+		   PROT_READ | PROT_WRITE,
+		   MAP_SHARED | MAP_FIXED,
+		   fd, arg.offset);
+	igt_assert(gtt == map + PAGE_SIZE);
+	gem_close(fd, arg.handle);
+	igt_assert(((unsigned long)gtt & (PAGE_SIZE - 1)) == 0);
+	igt_assert((sizeof(linear) & (PAGE_SIZE - 1)) == 0);
+
+	gem_userptr(fd, gtt, sizeof(linear), 0, userptr_flags, &handle);
+	igt_assert_eq(copy(fd, handle, handle), -EFAULT);
+	gem_close(fd, handle);
+
+	gem_userptr(fd, gtt, PAGE_SIZE, 0, userptr_flags, &handle);
+	igt_assert_eq(copy(fd, handle, handle), -EFAULT);
+	gem_close(fd, handle);
+
+	gem_userptr(fd, gtt + sizeof(linear) - PAGE_SIZE, PAGE_SIZE, 0, userptr_flags, &handle);
+	igt_assert_eq(copy(fd, handle, handle), -EFAULT);
+	gem_close(fd, handle);
+
+	/* boundaries */
+	gem_userptr(fd, map, 2*PAGE_SIZE, 0, userptr_flags, &handle);
+	igt_assert_eq(copy(fd, handle, handle), -EFAULT);
+	gem_close(fd, handle);
+
+	gem_userptr(fd, map + sizeof(linear), 2*PAGE_SIZE, 0, userptr_flags, &handle);
+	igt_assert_eq(copy(fd, handle, handle), -EFAULT);
+	gem_close(fd, handle);
+
+	munmap(map, sizeof(linear) + 2*PAGE_SIZE);
+
+	return 0;
+}
+
+#define PE_GTT_MAP 0x1
+#define PE_BUSY 0x2
+static void test_process_exit(int fd, int flags)
+{
+	if (flags & PE_GTT_MAP)
+		igt_require(gem_has_llc(fd));
+
+	igt_fork(child, 1) {
+		uint32_t handle;
+
+		handle = create_userptr_bo(fd, sizeof(linear));
+
+		if (flags & PE_GTT_MAP) {
+			uint32_t *ptr = __gem_mmap__gtt(fd, handle, sizeof(linear), PROT_READ | PROT_WRITE);
+			if (ptr)
+				*ptr = 0;
+		}
+
+		if (flags & PE_BUSY)
+			igt_assert_eq(copy(fd, handle, handle), 0);
+	}
+	igt_waitchildren();
+}
+
+static void test_forked_access(int fd)
+{
+	uint32_t handle1 = 0, handle2 = 0;
+	void *ptr1 = NULL, *ptr2 = NULL;
+	int ret;
+
+	ret = posix_memalign(&ptr1, PAGE_SIZE, sizeof(linear));
+#ifdef MADV_DONTFORK
+	ret |= madvise(ptr1, sizeof(linear), MADV_DONTFORK);
+#endif
+	gem_userptr(fd, ptr1, sizeof(linear), 0, userptr_flags, &handle1);
+	igt_assert(ptr1);
+	igt_assert(handle1);
+
+	ret = posix_memalign(&ptr2, PAGE_SIZE, sizeof(linear));
+#ifdef MADV_DONTFORK
+	ret |= madvise(ptr2, sizeof(linear), MADV_DONTFORK);
+#endif
+	gem_userptr(fd, ptr2, sizeof(linear), 0, userptr_flags, &handle2);
+	igt_assert(ptr2);
+	igt_assert(handle2);
+
+	memset(ptr1, 0x1, sizeof(linear));
+	memset(ptr2, 0x2, sizeof(linear));
+
+	igt_fork(child, 1)
+		igt_assert_eq(copy(fd, handle1, handle2), 0);
+	igt_waitchildren();
+
+	gem_userptr_sync(fd, handle1);
+	gem_userptr_sync(fd, handle2);
+
+	gem_close(fd, handle1);
+	gem_close(fd, handle2);
+
+	igt_assert(memcmp(ptr1, ptr2, sizeof(linear)) == 0);
+
+#ifdef MADV_DOFORK
+	ret = madvise(ptr1, sizeof(linear), MADV_DOFORK);
+	igt_assert_eq(ret, 0);
+#endif
+	free(ptr1);
+
+#ifdef MADV_DOFORK
+	ret = madvise(ptr2, sizeof(linear), MADV_DOFORK);
+	igt_assert_eq(ret, 0);
+#endif
+	free(ptr2);
+}
+
+#define MAP_FIXED_INVALIDATE_OVERLAP	(1<<0)
+#define MAP_FIXED_INVALIDATE_BUSY	(1<<1)
+#define MAP_FIXED_INVALIDATE_GET_PAGES	(1<<2)
+#define ALL_MAP_FIXED_INVALIDATE (MAP_FIXED_INVALIDATE_OVERLAP | \
+				  MAP_FIXED_INVALIDATE_BUSY | \
+				  MAP_FIXED_INVALIDATE_GET_PAGES)
+
+static int test_map_fixed_invalidate(int fd, uint32_t flags)
+{
+	const size_t ptr_size = sizeof(linear) + 2*PAGE_SIZE;
+	const int num_handles = (flags & MAP_FIXED_INVALIDATE_OVERLAP) ? 2 : 1;
+	uint32_t handle[num_handles];
+	uint32_t *ptr;
+
+	ptr = mmap(NULL, ptr_size,
+		   PROT_READ | PROT_WRITE,
+		   MAP_SHARED | MAP_ANONYMOUS,
+		   -1, 0);
+	igt_assert(ptr != MAP_FAILED);
+
+	for (int i = 0; i < num_handles; i++)
+		handle[i] = create_userptr(fd, 0, ptr + PAGE_SIZE/sizeof(*ptr));
+
+	for (char *fixed = (char *)ptr, *end = fixed + ptr_size;
+	     fixed + 2*PAGE_SIZE <= end;
+	     fixed += PAGE_SIZE) {
+		struct drm_i915_gem_mmap_gtt mmap_gtt;
+		uint32_t *map;
+
+		map = mmap(ptr, ptr_size, PROT_READ | PROT_WRITE,
+			   MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED,
+			   -1, 0);
+		igt_assert(map != MAP_FAILED);
+		igt_assert(map == ptr);
+
+		memset(&mmap_gtt, 0, sizeof(mmap_gtt));
+		mmap_gtt.handle = gem_create(fd, 2*PAGE_SIZE);
+		do_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_gtt);
+
+		if (flags & MAP_FIXED_INVALIDATE_GET_PAGES)
+			igt_assert_eq(__gem_set_domain(fd, handle[0],
+						       I915_GEM_DOMAIN_GTT,
+						       I915_GEM_DOMAIN_GTT),
+				      0);
+
+		if (flags & MAP_FIXED_INVALIDATE_BUSY)
+			igt_assert_eq(copy(fd, handle[0], handle[num_handles-1]), 0);
+
+		map = mmap(fixed, 2*PAGE_SIZE,
+			   PROT_READ | PROT_WRITE,
+			   MAP_SHARED | MAP_FIXED,
+			   fd, mmap_gtt.offset);
+		igt_assert(map != MAP_FAILED);
+		igt_assert(map == (uint32_t *)fixed);
+
+		gem_set_tiling(fd, mmap_gtt.handle, I915_TILING_NONE, 0);
+		*map = 0xdead;
+
+		if (flags & MAP_FIXED_INVALIDATE_GET_PAGES) {
+			igt_assert_eq(__gem_set_domain(fd, handle[0],
+						       I915_GEM_DOMAIN_GTT,
+						       I915_GEM_DOMAIN_GTT),
+				      -EFAULT);
+
+			/* Errors are permanent, so we have to recreate */
+			gem_close(fd, handle[0]);
+			handle[0] = create_userptr(fd, 0, ptr + PAGE_SIZE/sizeof(*ptr));
+		}
+
+		gem_set_tiling(fd, mmap_gtt.handle, I915_TILING_Y, 512 * 4);
+		*(uint32_t*)map = 0xbeef;
+
+		gem_close(fd, mmap_gtt.handle);
+	}
+
+	for (int i = 0; i < num_handles; i++)
+		gem_close(fd, handle[i]);
+	munmap(ptr, ptr_size);
+
+	return 0;
+}
+
+static int test_forbidden_ops(int fd)
+{
+	struct drm_i915_gem_pread gem_pread;
+	struct drm_i915_gem_pwrite gem_pwrite;
+	uint32_t handle;
+	void *ptr;
+
+	igt_assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
+	gem_userptr(fd, ptr, PAGE_SIZE, 0, userptr_flags, &handle);
+
+	/* pread/pwrite are not always forbidden, but when they
+	 * are they should fail with EINVAL.
+	 */
+
+	memset(&gem_pread, 0, sizeof(gem_pread));
+	gem_pread.handle = handle;
+	gem_pread.offset = 0;
+	gem_pread.size = PAGE_SIZE;
+	gem_pread.data_ptr = to_user_pointer(ptr);
+	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_PREAD, &gem_pread))
+		igt_assert_eq(errno, EINVAL);
+
+	memset(&gem_pwrite, 0, sizeof(gem_pwrite));
+	gem_pwrite.handle = handle;
+	gem_pwrite.offset = 0;
+	gem_pwrite.size = PAGE_SIZE;
+	gem_pwrite.data_ptr = to_user_pointer(ptr);
+	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &gem_pwrite))
+		igt_assert_eq(errno, EINVAL);
+
+	gem_close(fd, handle);
+	free(ptr);
+
+	return 0;
+}
+
+static void test_relocations(int fd)
+{
+	struct drm_i915_gem_relocation_entry *reloc;
+	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_execbuffer2 exec;
+	unsigned size;
+	void *ptr;
+	int i;
+
+	size = PAGE_SIZE + ALIGN(sizeof(*reloc)*256, PAGE_SIZE);
+
+	memset(&obj, 0, sizeof(obj));
+	igt_assert(posix_memalign(&ptr, PAGE_SIZE, size) == 0);
+	gem_userptr(fd, ptr, size, 0, userptr_flags, &obj.handle);
+	if (!gem_has_llc(fd))
+		gem_set_caching(fd, obj.handle, 0);
+	*(uint32_t *)ptr = MI_BATCH_BUFFER_END;
+
+	reloc = (typeof(reloc))((char *)ptr + PAGE_SIZE);
+	obj.relocs_ptr = to_user_pointer(reloc);
+	obj.relocation_count = 256;
+
+	memset(reloc, 0, 256*sizeof(*reloc));
+	for (i = 0; i < 256; i++) {
+		reloc[i].offset = 2048 - 4*i;
+		reloc[i].target_handle = obj.handle;
+		reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	}
+
+	memset(&exec, 0, sizeof(exec));
+	exec.buffers_ptr = to_user_pointer(&obj);
+	exec.buffer_count = 1;
+	gem_execbuf(fd, &exec);
+
+	gem_sync(fd, obj.handle);
+	gem_close(fd, obj.handle);
+	free(ptr);
+}
+
+static unsigned char counter;
+
+static void (* volatile orig_sigbus)(int sig, siginfo_t *info, void *param);
+static volatile unsigned long sigbus_start;
+static volatile long sigbus_cnt = -1;
+
+static void *umap(int fd, uint32_t handle)
+{
+	void *ptr;
+
+	if (gem_has_llc(fd)) {
+		ptr = gem_mmap__gtt(fd, handle, sizeof(linear),
+				    PROT_READ | PROT_WRITE);
+	} else {
+		uint32_t tmp = gem_create(fd, sizeof(linear));
+		igt_assert_eq(copy(fd, tmp, handle), 0);
+		ptr = gem_mmap__cpu(fd, tmp, 0, sizeof(linear), PROT_READ);
+		gem_close(fd, tmp);
+	}
+
+	return ptr;
+}
+
+static void
+check_bo(int fd1, uint32_t handle1, int is_userptr, int fd2, uint32_t handle2)
+{
+	unsigned char *ptr1, *ptr2;
+	unsigned long size = sizeof(linear);
+
+	ptr2 = umap(fd2, handle2);
+	if (is_userptr)
+		ptr1 = is_userptr > 0 ? get_handle_ptr(handle1) : ptr2;
+	else
+		ptr1 = umap(fd1, handle1);
+
+	igt_assert(ptr1);
+	igt_assert(ptr2);
+
+	sigbus_start = (unsigned long)ptr2;
+	igt_assert(memcmp(ptr1, ptr2, sizeof(linear)) == 0);
+
+	if (gem_has_llc(fd1)) {
+		counter++;
+		memset(ptr1, counter, size);
+		memset(ptr2, counter, size);
+	}
+
+	if (!is_userptr)
+		munmap(ptr1, sizeof(linear));
+	munmap(ptr2, sizeof(linear));
+}
+
+static int export_handle(int fd, uint32_t handle, int *outfd)
+{
+	struct drm_prime_handle args;
+	int ret;
+
+	args.handle = handle;
+	args.flags = DRM_CLOEXEC;
+	args.fd = -1;
+
+	ret = drmIoctl(fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
+	if (ret)
+		ret = errno;
+	*outfd = args.fd;
+
+	return ret;
+}
+
+static void sigbus(int sig, siginfo_t *info, void *param)
+{
+	unsigned long ptr = (unsigned long)info->si_addr;
+	void *addr;
+
+	if (ptr >= sigbus_start &&
+	    ptr < sigbus_start + sizeof(linear)) {
+		/* replace mapping to allow progress */
+		munmap((void *)sigbus_start, sizeof(linear));
+		addr = mmap((void *)sigbus_start, sizeof(linear),
+			    PROT_READ | PROT_WRITE,
+			    MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+		igt_assert((unsigned long)addr == sigbus_start);
+		memset(addr, counter, sizeof(linear));
+
+		sigbus_cnt++;
+		return;
+	}
+
+	if (orig_sigbus)
+		orig_sigbus(sig, info, param);
+	igt_assert(0);
+}
+
+static int test_dmabuf(void)
+{
+	int fd1, fd2;
+	uint32_t handle, handle_import;
+	int dma_buf_fd = -1;
+	int ret;
+
+	fd1 = drm_open_driver(DRIVER_INTEL);
+
+	handle = create_userptr_bo(fd1, sizeof(linear));
+	memset(get_handle_ptr(handle), counter, sizeof(linear));
+
+	ret = export_handle(fd1, handle, &dma_buf_fd);
+	if (userptr_flags & LOCAL_I915_USERPTR_UNSYNCHRONIZED && ret) {
+		igt_assert(ret == EINVAL || ret == ENODEV);
+		free_userptr_bo(fd1, handle);
+		close(fd1);
+		return 0;
+	} else {
+		igt_assert_eq(ret, 0);
+		igt_assert_lte(0, dma_buf_fd);
+	}
+
+	fd2 = drm_open_driver(DRIVER_INTEL);
+	handle_import = prime_fd_to_handle(fd2, dma_buf_fd);
+	check_bo(fd1, handle, 1, fd2, handle_import);
+
+	/* close dma_buf, check whether nothing disappears. */
+	close(dma_buf_fd);
+	check_bo(fd1, handle, 1, fd2, handle_import);
+
+	/* destroy userptr object and expect SIGBUS */
+	free_userptr_bo(fd1, handle);
+	close(fd1);
+
+	if (gem_has_llc(fd2)) {
+		struct sigaction sigact, orig_sigact;
+
+		memset(&sigact, 0, sizeof(sigact));
+		sigact.sa_sigaction = sigbus;
+		sigact.sa_flags = SA_SIGINFO;
+		ret = sigaction(SIGBUS, &sigact, &orig_sigact);
+		igt_assert_eq(ret, 0);
+
+		orig_sigbus = orig_sigact.sa_sigaction;
+
+		sigbus_cnt = 0;
+		check_bo(fd2, handle_import, -1, fd2, handle_import);
+		igt_assert(sigbus_cnt > 0);
+
+		ret = sigaction(SIGBUS, &orig_sigact, NULL);
+		igt_assert_eq(ret, 0);
+	}
+
+	close(fd2);
+	reset_handle_ptr();
+
+	return 0;
+}
+
+static void store_dword_rand(int i915, unsigned int engine,
+			     uint32_t target, uint64_t sz,
+			     int count)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	struct drm_i915_gem_relocation_entry *reloc;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_execbuffer2 exec;
+	unsigned int batchsz;
+	uint32_t *batch;
+	int i;
+
+	batchsz = count * 16 + 4;
+	batchsz = ALIGN(batchsz, 4096);
+
+	reloc = calloc(sizeof(*reloc), count);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = target;
+	obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B;
+	obj[1].handle = gem_create(i915, batchsz);
+	obj[1].relocation_count = count;
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+
+	batch = gem_mmap__wc(i915, obj[1].handle, 0, batchsz, PROT_WRITE);
+
+	memset(&exec, 0, sizeof(exec));
+	exec.buffer_count = 2;
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.flags = engine;
+	if (gen < 6)
+		exec.flags |= I915_EXEC_SECURE;
+
+	i = 0;
+	for (int n = 0; n < count; n++) {
+		uint64_t offset;
+
+		reloc[n].target_handle = obj[0].handle;
+		reloc[n].delta = rand() % (sz / 4) * 4;
+		reloc[n].offset = (i + 1) * sizeof(uint32_t);
+		reloc[n].presumed_offset = obj[0].offset;
+		reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc[n].write_domain = I915_GEM_DOMAIN_RENDER;
+
+		offset = reloc[n].presumed_offset + reloc[n].delta;
+
+		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			batch[++i] = offset;
+			batch[++i] = offset >> 32;
+		} else if (gen >= 4) {
+			batch[++i] = 0;
+			batch[++i] = offset;
+			reloc[n].offset += sizeof(uint32_t);
+		} else {
+			batch[i]--;
+			batch[++i] = offset;
+		}
+		batch[++i] = rand();
+		i++;
+	}
+	batch[i] = MI_BATCH_BUFFER_END;
+	igt_assert(i * sizeof(uint32_t) < batchsz);
+	munmap(batch, batchsz);
+
+	gem_execbuf(i915, &exec);
+
+	gem_close(i915, obj[1].handle);
+	free(reloc);
+}
+
+static void test_readonly(int i915)
+{
+	unsigned char orig[SHA_DIGEST_LENGTH];
+	uint64_t aperture_size;
+	uint32_t whandle, rhandle;
+	size_t sz, total;
+	void *pages, *space;
+	int memfd;
+
+	/*
+	 * A small batch of pages; small enough to cheaply check for stray
+	 * writes but large enough that we don't create too many VMA pointing
+	 * back to this set from the large arena. The limit on total number
+	 * of VMA for a process is 65,536 (at least on this kernel).
+	 *
+	 * We then write from the GPU through the large arena into the smaller
+	 * backing storage, which we can cheaply check to see if those writes
+	 * have landed (using a SHA1sum). Repeating the same random GPU writes
+	 * though a read-only handle to confirm that this time the writes are
+	 * discarded and the backing store unchanged.
+	 */
+	sz = 16 << 12;
+	memfd = memfd_create("pages", 0);
+	igt_require(memfd != -1);
+	igt_require(ftruncate(memfd, sz) == 0);
+
+	pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0);
+	igt_assert(pages != MAP_FAILED);
+
+	igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &rhandle) == 0);
+	gem_close(i915, rhandle);
+
+	gem_userptr(i915, pages, sz, false, userptr_flags, &whandle);
+
+	/*
+	 * We have only a 31bit delta which we use for generating
+	 * the target address for MI_STORE_DWORD_IMM, so our maximum
+	 * usuable object size is only 2GiB. For now.
+	 */
+	total = 2048ull << 20;
+	aperture_size = gem_aperture_size(i915) / 2;
+	if (aperture_size < total)
+		total = aperture_size;
+	total = total / sz * sz;
+	igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n",
+		 total, total >> 12, sz >> 12);
+
+	/* Create an arena all pointing to the same set of pages */
+	space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0);
+	igt_require(space != MAP_FAILED);
+	for (size_t offset = 0; offset < total; offset += sz) {
+		igt_assert(mmap(space + offset, sz,
+				PROT_WRITE, MAP_SHARED | MAP_FIXED,
+				memfd, 0) != MAP_FAILED);
+		*(uint32_t *)(space + offset) = offset;
+	}
+	igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz));
+	igt_assert(mlock(space, total) == 0);
+	close(memfd);
+
+	/* Check we can create a normal userptr bo wrapping the wrapper */
+	gem_userptr(i915, space, total, false, userptr_flags, &rhandle);
+	gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0);
+	for (size_t offset = 0; offset < total; offset += sz)
+		store_dword(i915, rhandle, offset + 4, offset / sz);
+	gem_sync(i915, rhandle);
+	igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz));
+	igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1));
+	gem_close(i915, rhandle);
+
+	/* Now enforce read-only henceforth */
+	igt_assert(mprotect(space, total, PROT_READ) == 0);
+
+	SHA1(pages, sz, orig);
+	igt_fork(child, 1) {
+		unsigned int engine;
+
+		gem_userptr(i915, space, total, true, userptr_flags, &rhandle);
+
+		for_each_engine(i915, engine) {
+			unsigned char ref[SHA_DIGEST_LENGTH];
+			unsigned char result[SHA_DIGEST_LENGTH];
+
+			/* First tweak the backing store through the write */
+			store_dword_rand(i915, engine, whandle, sz, 1024);
+			gem_sync(i915, whandle);
+			SHA1(pages, sz, ref);
+
+			/* Check some writes did land */
+			igt_assert(memcmp(ref, orig, sizeof(ref)));
+			memcpy(orig, ref, sizeof(orig));
+
+			/* Now try the same through the read-only handle */
+			store_dword_rand(i915, engine, rhandle, total, 1024);
+			gem_sync(i915, rhandle);
+			SHA1(pages, sz, result);
+
+			/*
+			 * As the writes into the read-only GPU bo should fail,
+			 * the SHA1 hash of the backing store should be
+			 * unaffected.
+			 */
+			igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0);
+		}
+
+		gem_close(i915, rhandle);
+	}
+	igt_waitchildren();
+
+	munmap(space, total);
+	munmap(pages, sz);
+}
+
+static jmp_buf sigjmp;
+static void sigjmp_handler(int sig)
+{
+	siglongjmp(sigjmp, sig);
+}
+
+static void test_readonly_mmap(int i915)
+{
+	unsigned char original[SHA_DIGEST_LENGTH];
+	unsigned char result[SHA_DIGEST_LENGTH];
+	uint32_t handle;
+	uint32_t sz;
+	void *pages;
+	void *ptr;
+	int sig;
+
+	/*
+	 * A quick check to ensure that we cannot circumvent the
+	 * read-only nature of our memory by creating a GTT mmap into
+	 * the pages. Imagine receiving a readonly SHM segment from
+	 * another process, or a readonly file mmap, it must remain readonly
+	 * on the GPU as well.
+	 */
+
+	igt_require(igt_setup_clflush());
+
+	sz = 16 << 12;
+	pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+	igt_assert(pages != MAP_FAILED);
+
+	igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0);
+	gem_set_caching(i915, handle, 0);
+
+	memset(pages, 0xa5, sz);
+	igt_clflush_range(pages, sz);
+	SHA1(pages, sz, original);
+
+	ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE);
+	igt_assert(ptr == NULL);
+
+	ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ);
+	gem_close(i915, handle);
+
+	/* Check that a write into the GTT readonly map fails */
+	if (!(sig = sigsetjmp(sigjmp, 1))) {
+		signal(SIGBUS, sigjmp_handler);
+		signal(SIGSEGV, sigjmp_handler);
+		memset(ptr, 0x5a, sz);
+		igt_assert(0);
+	}
+	igt_assert_eq(sig, SIGSEGV);
+
+	/* Check that we disallow removing the readonly protection */
+	igt_assert(mprotect(ptr, sz, PROT_WRITE));
+	if (!(sig = sigsetjmp(sigjmp, 1))) {
+		signal(SIGBUS, sigjmp_handler);
+		signal(SIGSEGV, sigjmp_handler);
+		memset(ptr, 0x5a, sz);
+		igt_assert(0);
+	}
+	igt_assert_eq(sig, SIGSEGV);
+
+	/* A single read from the GTT pointer to prove that works */
+	igt_assert_eq_u32(*(uint8_t *)ptr, 0xa5);
+	munmap(ptr, sz);
+
+	/* Double check that the kernel did indeed not let any writes through */
+	igt_clflush_range(pages, sz);
+	SHA1(pages, sz, result);
+	igt_assert(!memcmp(original, result, sizeof(original)));
+
+	munmap(pages, sz);
+}
+
+static void test_readonly_pwrite(int i915)
+{
+	unsigned char original[SHA_DIGEST_LENGTH];
+	unsigned char result[SHA_DIGEST_LENGTH];
+	uint32_t handle;
+	uint32_t sz;
+	void *pages;
+
+	/*
+	 * Same as for GTT mmapings, we cannot alone ourselves to
+	 * circumvent readonly protection on a piece of memory via the
+	 * pwrite ioctl.
+	 */
+
+	igt_require(igt_setup_clflush());
+
+	sz = 16 << 12;
+	pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+	igt_assert(pages != MAP_FAILED);
+
+	igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0);
+	memset(pages, 0xa5, sz);
+	SHA1(pages, sz, original);
+
+	for (int page = 0; page < 16; page++) {
+		char data[4096];
+
+		memset(data, page, sizeof(data));
+		igt_assert_eq(__gem_write(i915, handle, page << 12, data, sizeof(data)), -EINVAL);
+	}
+
+	gem_close(i915, handle);
+
+	SHA1(pages, sz, result);
+	igt_assert(!memcmp(original, result, sizeof(original)));
+
+	munmap(pages, sz);
+}
+
+static int test_usage_restrictions(int fd)
+{
+	void *ptr;
+	int ret;
+	uint32_t handle;
+
+	igt_assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE * 2) == 0);
+
+	/* Address not aligned. */
+	ret = __gem_userptr(fd, (char *)ptr + 1, PAGE_SIZE, 0, userptr_flags, &handle);
+	igt_assert_neq(ret, 0);
+
+	/* Size not rounded to page size. */
+	ret = __gem_userptr(fd, ptr, PAGE_SIZE - 1, 0, userptr_flags, &handle);
+	igt_assert_neq(ret, 0);
+
+	/* Both wrong. */
+	ret = __gem_userptr(fd, (char *)ptr + 1, PAGE_SIZE - 1, 0, userptr_flags, &handle);
+	igt_assert_neq(ret, 0);
+
+	free(ptr);
+
+	return 0;
+}
+
+static int test_create_destroy(int fd, int time)
+{
+	struct timespec start, now;
+	uint32_t handle;
+	void *ptr;
+	int n;
+
+	igt_fork_signal_helper();
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	do {
+		for (n = 0; n < 1000; n++) {
+			igt_assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
+
+			gem_userptr(fd, ptr, PAGE_SIZE, 0, userptr_flags, &handle);
+
+			gem_close(fd, handle);
+			free(ptr);
+		}
+
+		clock_gettime(CLOCK_MONOTONIC, &now);
+		now.tv_sec -= time;
+	} while (now.tv_sec < start.tv_sec ||
+		 (now.tv_sec == start.tv_sec && now.tv_nsec < start.tv_nsec));
+
+	igt_stop_signal_helper();
+
+	return 0;
+}
+
+static int test_coherency(int fd, int count)
+{
+	uint32_t *memory;
+	uint32_t *cpu, *cpu_val;
+	uint32_t *gpu, *gpu_val;
+	uint32_t start = 0;
+	int i, ret;
+
+	igt_info("Using 2x%d 1MiB buffers\n", count);
+	intel_require_memory(2*count, sizeof(linear), CHECK_RAM);
+
+	ret = posix_memalign((void **)&memory, PAGE_SIZE, count*sizeof(linear));
+	igt_assert(ret == 0 && memory);
+
+	gpu = malloc(sizeof(uint32_t)*count*4);
+	gpu_val = gpu + count;
+	cpu = gpu_val + count;
+	cpu_val = cpu + count;
+
+	for (i = 0; i < count; i++) {
+		gpu[i] = create_bo(fd, start);
+		gpu_val[i] = start;
+		start += WIDTH*HEIGHT;
+	}
+
+	for (i = 0; i < count; i++) {
+		cpu[i] = create_userptr(fd, start, memory+i*WIDTH*HEIGHT);
+		cpu_val[i] = start;
+		start += WIDTH*HEIGHT;
+	}
+
+	igt_info("Verifying initialisation...\n");
+	for (i = 0; i < count; i++) {
+		check_gpu(fd, gpu[i], gpu_val[i]);
+		check_cpu(memory+i*WIDTH*HEIGHT, cpu_val[i]);
+	}
+
+	igt_info("Cyclic blits cpu->gpu, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		igt_assert_eq(copy(fd, gpu[dst], cpu[src]), 0);
+		gpu_val[dst] = cpu_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_gpu(fd, gpu[i], gpu_val[i]);
+
+	igt_info("Cyclic blits gpu->cpu, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		igt_assert_eq(copy(fd, cpu[dst], gpu[src]), 0);
+		cpu_val[dst] = gpu_val[src];
+	}
+	for (i = 0; i < count; i++) {
+		gem_userptr_sync(fd, cpu[i]);
+		check_cpu(memory+i*WIDTH*HEIGHT, cpu_val[i]);
+	}
+
+	igt_info("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (random() & 1) {
+			igt_assert_eq(copy(fd, gpu[dst], cpu[src]), 0);
+			gpu_val[dst] = cpu_val[src];
+		} else {
+			igt_assert_eq(copy(fd, cpu[dst], gpu[src]), 0);
+			cpu_val[dst] = gpu_val[src];
+		}
+	}
+	for (i = 0; i < count; i++) {
+		check_gpu(fd, gpu[i], gpu_val[i]);
+		gem_close(fd, gpu[i]);
+
+		gem_userptr_sync(fd, cpu[i]);
+		check_cpu(memory+i*WIDTH*HEIGHT, cpu_val[i]);
+		gem_close(fd, cpu[i]);
+	}
+
+	free(gpu);
+	free(memory);
+
+	return 0;
+}
+
+static struct igt_eviction_test_ops fault_ops = {
+	.create = create_userptr_bo,
+	.flink = flink_userptr_bo,
+	.close = free_userptr_bo,
+	.copy = blit,
+	.clear = clear,
+};
+
+static int can_swap(void)
+{
+	unsigned long as, ram;
+
+	/* Cannot swap if not enough address space */
+
+	/* FIXME: Improve check criteria. */
+	if (sizeof(void*) < 8)
+		as = 3 * 1024;
+	else
+		as = 256 * 1024; /* Just a big number */
+
+	ram = intel_get_total_ram_mb();
+
+	if ((as - 128) < (ram - 256))
+		return 0;
+
+	return 1;
+}
+
+static void test_forking_evictions(int fd, int size, int count,
+			     unsigned flags)
+{
+	int trash_count;
+	int num_threads;
+
+	trash_count = intel_get_total_ram_mb() * 11 / 10;
+	/* Use the fact test will spawn a number of child
+	 * processes meaning swapping will be triggered system
+	 * wide even if one process on it's own can't do it.
+	 */
+	num_threads = min(sysconf(_SC_NPROCESSORS_ONLN) * 4, 12);
+	trash_count /= num_threads;
+	if (count > trash_count)
+		count = trash_count;
+
+	forking_evictions(fd, &fault_ops, size, count, trash_count, flags);
+	reset_handle_ptr();
+}
+
+static void test_mlocked_evictions(int fd, int size, int count)
+{
+	count = min(256, count/2);
+	mlocked_evictions(fd, &fault_ops, size, count);
+	reset_handle_ptr();
+}
+
+static void test_swapping_evictions(int fd, int size, int count)
+{
+	int trash_count;
+
+	igt_skip_on_f(!can_swap(),
+		"Not enough process address space for swapping tests.\n");
+
+	trash_count = intel_get_total_ram_mb() * 11 / 10;
+
+	swapping_evictions(fd, &fault_ops, size, count, trash_count);
+	reset_handle_ptr();
+}
+
+static void test_minor_evictions(int fd, int size, int count)
+{
+	minor_evictions(fd, &fault_ops, size, count);
+	reset_handle_ptr();
+}
+
+static void test_major_evictions(int fd, int size, int count)
+{
+	major_evictions(fd, &fault_ops, size, count);
+	reset_handle_ptr();
+}
+
+static void test_overlap(int fd, int expected)
+{
+	char *ptr;
+	int ret;
+	uint32_t handle, handle2;
+
+	igt_assert(posix_memalign((void *)&ptr, PAGE_SIZE, PAGE_SIZE * 3) == 0);
+
+	gem_userptr(fd, ptr + PAGE_SIZE, PAGE_SIZE, 0, userptr_flags, &handle);
+
+	/* before, no overlap */
+	ret = __gem_userptr(fd, ptr, PAGE_SIZE, 0, userptr_flags, &handle2);
+	if (ret == 0)
+		gem_close(fd, handle2);
+	igt_assert_eq(ret, 0);
+
+	/* after, no overlap */
+	ret = __gem_userptr(fd, ptr + PAGE_SIZE * 2, PAGE_SIZE, 0, userptr_flags, &handle2);
+	if (ret == 0)
+		gem_close(fd, handle2);
+	igt_assert_eq(ret, 0);
+
+	/* exactly overlapping */
+	ret = __gem_userptr(fd, ptr + PAGE_SIZE, PAGE_SIZE, 0, userptr_flags, &handle2);
+	if (ret == 0)
+		gem_close(fd, handle2);
+	igt_assert(ret == 0 || ret == expected);
+
+	/* start overlaps */
+	ret = __gem_userptr(fd, ptr, PAGE_SIZE * 2, 0, userptr_flags, &handle2);
+	if (ret == 0)
+		gem_close(fd, handle2);
+	igt_assert(ret == 0 || ret == expected);
+
+	/* end overlaps */
+	ret = __gem_userptr(fd, ptr + PAGE_SIZE, PAGE_SIZE * 2, 0, userptr_flags, &handle2);
+	if (ret == 0)
+		gem_close(fd, handle2);
+	igt_assert(ret == 0 || ret == expected);
+
+	/* subsumes */
+	ret = __gem_userptr(fd, ptr, PAGE_SIZE * 3, 0, userptr_flags, &handle2);
+	if (ret == 0)
+		gem_close(fd, handle2);
+	igt_assert(ret == 0 || ret == expected);
+
+	gem_close(fd, handle);
+	free(ptr);
+}
+
+static void test_unmap(int fd, int expected)
+{
+	char *ptr, *bo_ptr;
+	const unsigned int num_obj = 3;
+	unsigned int i;
+	uint32_t bo[num_obj + 1];
+	size_t map_size = sizeof(linear) * num_obj + (PAGE_SIZE - 1);
+	int ret;
+
+	ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
+				MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	igt_assert(ptr != MAP_FAILED);
+
+	bo_ptr = (char *)ALIGN((unsigned long)ptr, PAGE_SIZE);
+
+	for (i = 0; i < num_obj; i++, bo_ptr += sizeof(linear)) {
+		gem_userptr(fd, bo_ptr, sizeof(linear), 0, userptr_flags, &bo[i]);
+	}
+
+	bo[num_obj] = create_bo(fd, 0);
+
+	for (i = 0; i < num_obj; i++)
+		igt_assert_eq(copy(fd, bo[num_obj], bo[i]), 0);
+
+	ret = munmap(ptr, map_size);
+	igt_assert_eq(ret, 0);
+
+	for (i = 0; i < num_obj; i++)
+		igt_assert_eq(copy(fd, bo[num_obj], bo[i]), -expected);
+
+	for (i = 0; i < (num_obj + 1); i++)
+		gem_close(fd, bo[i]);
+}
+
+static void test_unmap_after_close(int fd)
+{
+	char *ptr, *bo_ptr;
+	const unsigned int num_obj = 3;
+	unsigned int i;
+	uint32_t bo[num_obj + 1];
+	size_t map_size = sizeof(linear) * num_obj + (PAGE_SIZE - 1);
+	int ret;
+
+	ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
+				MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	igt_assert(ptr != MAP_FAILED);
+
+	bo_ptr = (char *)ALIGN((unsigned long)ptr, PAGE_SIZE);
+
+	for (i = 0; i < num_obj; i++, bo_ptr += sizeof(linear)) {
+		gem_userptr(fd, bo_ptr, sizeof(linear), 0, userptr_flags, &bo[i]);
+	}
+
+	bo[num_obj] = create_bo(fd, 0);
+
+	for (i = 0; i < num_obj; i++)
+		igt_assert_eq(copy(fd, bo[num_obj], bo[i]), 0);
+
+	for (i = 0; i < (num_obj + 1); i++)
+		gem_close(fd, bo[i]);
+
+	ret = munmap(ptr, map_size);
+	igt_assert_eq(ret, 0);
+}
+
+static void test_unmap_cycles(int fd, int expected)
+{
+	int i;
+
+	for (i = 0; i < 1000; i++)
+		test_unmap(fd, expected);
+}
+
+#define MM_STRESS_LOOPS 100000
+
+struct stress_thread_data {
+	unsigned int stop;
+	int exit_code;
+};
+
+static void *mm_stress_thread(void *data)
+{
+	struct stress_thread_data *stdata = (struct stress_thread_data *)data;
+	void *ptr;
+	int ret;
+
+	while (!stdata->stop) {
+		ptr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
+				MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+		if (ptr == MAP_FAILED) {
+			stdata->exit_code = -EFAULT;
+			break;
+		}
+		ret = munmap(ptr, PAGE_SIZE);
+		if (ret) {
+		        stdata->exit_code = errno;
+		        break;
+		}
+	}
+
+	return NULL;
+}
+
+static void test_stress_mm(int fd)
+{
+	int ret;
+	pthread_t t;
+	unsigned int loops = MM_STRESS_LOOPS;
+	uint32_t handle;
+	void *ptr;
+	struct stress_thread_data stdata;
+
+	memset(&stdata, 0, sizeof(stdata));
+
+	igt_assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
+
+	ret = pthread_create(&t, NULL, mm_stress_thread, &stdata);
+	igt_assert_eq(ret, 0);
+
+	while (loops--) {
+		gem_userptr(fd, ptr, PAGE_SIZE, 0, userptr_flags, &handle);
+
+		gem_close(fd, handle);
+	}
+
+	free(ptr);
+
+	stdata.stop = 1;
+	ret = pthread_join(t, NULL);
+	igt_assert_eq(ret, 0);
+
+	igt_assert_eq(stdata.exit_code, 0);
+}
+
+struct userptr_close_thread_data {
+	int fd;
+	void *ptr;
+	bool overlap;
+	bool stop;
+	pthread_mutex_t mutex;
+};
+
+static void *mm_userptr_close_thread(void *data)
+{
+	struct userptr_close_thread_data *t = (struct userptr_close_thread_data *)data;
+	int num_handles = t->overlap ? 2 : 1;
+
+	uint32_t handle[num_handles];
+
+	/* Be pedantic and enforce the required memory barriers */
+	pthread_mutex_lock(&t->mutex);
+	while (!t->stop) {
+		pthread_mutex_unlock(&t->mutex);
+		for (int i = 0; i < num_handles; i++)
+			gem_userptr(t->fd, t->ptr, PAGE_SIZE, 0, userptr_flags, &handle[i]);
+		for (int i = 0; i < num_handles; i++)
+			gem_close(t->fd, handle[i]);
+		pthread_mutex_lock(&t->mutex);
+	}
+	pthread_mutex_unlock(&t->mutex);
+
+	return NULL;
+}
+
+static void test_invalidate_close_race(int fd, bool overlap)
+{
+	pthread_t t;
+	unsigned int loops = MM_STRESS_LOOPS;
+	struct userptr_close_thread_data t_data;
+
+	memset(&t_data, 0, sizeof(t_data));
+	t_data.fd = fd;
+	t_data.overlap = overlap;
+	igt_assert(posix_memalign(&t_data.ptr, PAGE_SIZE, PAGE_SIZE) == 0);
+	pthread_mutex_init(&t_data.mutex, NULL);
+
+	igt_assert(pthread_create(&t, NULL, mm_userptr_close_thread, &t_data) == 0);
+
+	while (loops--) {
+		mprotect(t_data.ptr, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
+		mprotect(t_data.ptr, PAGE_SIZE, PROT_READ | PROT_WRITE);
+	}
+
+	pthread_mutex_lock(&t_data.mutex);
+	t_data.stop = 1;
+	pthread_mutex_unlock(&t_data.mutex);
+
+	pthread_join(t, NULL);
+
+	pthread_mutex_destroy(&t_data.mutex);
+	free(t_data.ptr);
+}
+
+uint64_t total_ram;
+uint64_t aperture_size;
+int fd, count;
+
+
+int main(int argc, char **argv)
+{
+	int size = sizeof(linear);
+
+	igt_subtest_init(argc, argv);
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_assert(fd >= 0);
+		igt_require_gem(fd);
+
+		size = sizeof(linear);
+
+		aperture_size = gem_aperture_size(fd);
+		igt_info("Aperture size is %lu MiB\n", (long)(aperture_size / (1024*1024)));
+
+		if (argc > 1)
+			count = atoi(argv[1]);
+		if (count == 0)
+			count = 2 * aperture_size / (1024*1024) / 3;
+
+		total_ram = intel_get_total_ram_mb();
+		igt_info("Total RAM is %'llu MiB\n", (long long)total_ram);
+
+		if (count > total_ram * 3 / 4) {
+			count = intel_get_total_ram_mb() * 3 / 4;
+			igt_info("Not enough RAM to run test, reducing buffer count.\n");
+		}
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(has_userptr(fd));
+		}
+
+		igt_subtest("input-checking")
+			test_input_checking(fd);
+
+		igt_subtest("usage-restrictions")
+			test_usage_restrictions(fd);
+
+		igt_subtest("invalid-null-pointer")
+			test_invalid_null_pointer(fd);
+
+		igt_subtest("invalid-gtt-mapping")
+			test_invalid_gtt_mapping(fd);
+
+		igt_subtest("forked-access")
+			test_forked_access(fd);
+
+		igt_subtest("forbidden-operations")
+			test_forbidden_ops(fd);
+
+		igt_subtest("relocations")
+			test_relocations(fd);
+	}
+
+	igt_subtest_group {
+		gem_userptr_test_unsynchronized();
+
+		igt_fixture {
+			igt_require(has_userptr(fd));
+		}
+
+		igt_subtest("create-destroy-unsync")
+			test_create_destroy(fd, 5);
+
+		igt_subtest("unsync-overlap")
+			test_overlap(fd, 0);
+
+		igt_subtest("unsync-unmap")
+			test_unmap(fd, 0);
+
+		igt_subtest("unsync-unmap-cycles")
+			test_unmap_cycles(fd, 0);
+
+		igt_subtest("unsync-unmap-after-close")
+			test_unmap_after_close(fd);
+
+		igt_subtest("coherency-unsync")
+			test_coherency(fd, count);
+
+		igt_subtest("dmabuf-unsync")
+			test_dmabuf();
+
+		igt_subtest("readonly-unsync")
+			test_readonly(fd);
+
+		igt_subtest("readonly-mmap-unsync")
+			test_readonly_mmap(fd);
+
+		igt_subtest("readonly-pwrite-unsync")
+			test_readonly_pwrite(fd);
+
+		for (unsigned flags = 0; flags < ALL_FORKING_EVICTIONS + 1; flags++) {
+			igt_subtest_f("forked-unsync%s%s%s-%s",
+					flags & FORKING_EVICTIONS_SWAPPING ? "-swapping" : "",
+					flags & FORKING_EVICTIONS_DUP_DRMFD ? "-multifd" : "",
+					flags & FORKING_EVICTIONS_MEMORY_PRESSURE ?
+					"-mempressure" : "",
+					flags & FORKING_EVICTIONS_INTERRUPTIBLE ?
+					"interruptible" : "normal") {
+				test_forking_evictions(fd, size, count, flags);
+			}
+		}
+
+		igt_subtest("mlocked-unsync-normal")
+			test_mlocked_evictions(fd, size, count);
+
+		igt_subtest("swapping-unsync-normal")
+			test_swapping_evictions(fd, size, count);
+
+		igt_subtest("minor-unsync-normal")
+			test_minor_evictions(fd, size, count);
+
+		igt_subtest("major-unsync-normal") {
+			size = 200 * 1024 * 1024;
+			count = (gem_aperture_size(fd) / size) + 2;
+			test_major_evictions(fd, size, count);
+		}
+
+		igt_fixture {
+			size = sizeof(linear);
+			count = 2 * gem_aperture_size(fd) / (1024*1024) / 3;
+			if (count > total_ram * 3 / 4)
+				count = intel_get_total_ram_mb() * 3 / 4;
+		}
+
+		igt_fork_signal_helper();
+
+		igt_subtest("mlocked-unsync-interruptible")
+			test_mlocked_evictions(fd, size, count);
+
+		igt_subtest("swapping-unsync-interruptible")
+			test_swapping_evictions(fd, size, count);
+
+		igt_subtest("minor-unsync-interruptible")
+			test_minor_evictions(fd, size, count);
+
+		igt_subtest("major-unsync-interruptible") {
+			size = 200 * 1024 * 1024;
+			count = (gem_aperture_size(fd) / size) + 2;
+			test_major_evictions(fd, size, count);
+		}
+
+		igt_stop_signal_helper();
+	}
+
+	igt_subtest_group {
+		gem_userptr_test_synchronized();
+
+		igt_fixture {
+			igt_require(has_userptr(fd));
+			size = sizeof(linear);
+			count = 2 * gem_aperture_size(fd) / (1024*1024) / 3;
+			if (count > total_ram * 3 / 4)
+				count = intel_get_total_ram_mb() * 3 / 4;
+		}
+
+		igt_subtest("process-exit")
+			test_process_exit(fd, 0);
+
+		igt_subtest("process-exit-gtt")
+			test_process_exit(fd, PE_GTT_MAP);
+
+		igt_subtest("process-exit-busy")
+			test_process_exit(fd, PE_BUSY);
+
+		igt_subtest("process-exit-gtt-busy")
+			test_process_exit(fd, PE_GTT_MAP | PE_BUSY);
+
+		igt_subtest("create-destroy-sync")
+			test_create_destroy(fd, 5);
+
+		igt_subtest("sync-overlap")
+			test_overlap(fd, EINVAL);
+
+		igt_subtest("sync-unmap")
+			test_unmap(fd, EFAULT);
+
+		igt_subtest("sync-unmap-cycles")
+			test_unmap_cycles(fd, EFAULT);
+
+		igt_subtest("sync-unmap-after-close")
+			test_unmap_after_close(fd);
+
+		igt_subtest("stress-mm")
+			test_stress_mm(fd);
+
+		igt_subtest("stress-mm-invalidate-close")
+			test_invalidate_close_race(fd, false);
+
+		igt_subtest("stress-mm-invalidate-close-overlap")
+			test_invalidate_close_race(fd, true);
+
+		for (unsigned flags = 0; flags < ALL_MAP_FIXED_INVALIDATE + 1; flags++) {
+			igt_subtest_f("map-fixed-invalidate%s%s%s",
+				      flags & MAP_FIXED_INVALIDATE_OVERLAP ? "-overlap" : "",
+				      flags & MAP_FIXED_INVALIDATE_BUSY ? "-busy" : "",
+				      flags & MAP_FIXED_INVALIDATE_GET_PAGES ? "-gup" : "") {
+				test_map_fixed_invalidate(fd, flags);
+			}
+		}
+
+		igt_subtest("coherency-sync")
+			test_coherency(fd, count);
+
+		igt_subtest("dmabuf-sync")
+			test_dmabuf();
+
+		for (unsigned flags = 0; flags < ALL_FORKING_EVICTIONS + 1; flags++) {
+			igt_subtest_f("forked-sync%s%s%s-%s",
+					flags & FORKING_EVICTIONS_SWAPPING ? "-swapping" : "",
+					flags & FORKING_EVICTIONS_DUP_DRMFD ? "-multifd" : "",
+					flags & FORKING_EVICTIONS_MEMORY_PRESSURE ?
+					"-mempressure" : "",
+					flags & FORKING_EVICTIONS_INTERRUPTIBLE ?
+					"interruptible" : "normal") {
+				test_forking_evictions(fd, size, count, flags);
+			}
+		}
+
+		igt_subtest("mlocked-normal-sync")
+			test_mlocked_evictions(fd, size, count);
+
+		igt_subtest("swapping-normal-sync")
+			test_swapping_evictions(fd, size, count);
+
+		igt_subtest("minor-normal-sync")
+			test_minor_evictions(fd, size, count);
+
+		igt_subtest("major-normal-sync") {
+			size = 200 * 1024 * 1024;
+			count = (gem_aperture_size(fd) / size) + 2;
+			test_major_evictions(fd, size, count);
+		}
+
+		igt_fixture {
+			size = 1024 * 1024;
+			count = 2 * gem_aperture_size(fd) / (1024*1024) / 3;
+			if (count > total_ram * 3 / 4)
+				count = intel_get_total_ram_mb() * 3 / 4;
+		}
+
+		igt_fork_signal_helper();
+
+		igt_subtest("mlocked-sync-interruptible")
+			test_mlocked_evictions(fd, size, count);
+
+		igt_subtest("swapping-sync-interruptible")
+			test_swapping_evictions(fd, size, count);
+
+		igt_subtest("minor-sync-interruptible")
+			test_minor_evictions(fd, size, count);
+
+		igt_subtest("major-sync-interruptible") {
+			size = 200 * 1024 * 1024;
+			count = (gem_aperture_size(fd) / size) + 2;
+			test_major_evictions(fd, size, count);
+		}
+
+		igt_stop_signal_helper();
+	}
+
+
+	igt_subtest("access-control")
+		test_access_control(fd);
+
+	igt_exit();
+}
diff --git a/tests/i915/gem_wait.c b/tests/i915/gem_wait.c
new file mode 100644
index 00000000..7914c936
--- /dev/null
+++ b/tests/i915/gem_wait.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include "igt.h"
+#include "igt_vgem.h"
+
+static int __gem_wait(int fd, struct drm_i915_gem_wait *w)
+{
+	int err;
+
+	err = 0;
+	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_WAIT, w))
+		err = -errno;
+
+	return err;
+}
+
+static void invalid_flags(int fd)
+{
+	struct drm_i915_gem_wait wait;
+
+	memset(&wait, 0, sizeof(wait));
+	wait.bo_handle = gem_create(fd, 4096);
+	wait.timeout_ns = 1;
+	/* NOTE: This test intentionally tests for just the next available flag.
+	 * Don't "fix" this testcase without the ABI testcases for new flags
+	 * first. */
+	wait.flags = 1;
+
+	igt_assert_eq(__gem_wait(fd, &wait), -EINVAL);
+
+	gem_close(fd, wait.bo_handle);
+}
+
+static void invalid_buf(int fd)
+{
+	struct drm_i915_gem_wait wait;
+
+	memset(&wait, 0, sizeof(wait));
+	igt_assert_eq(__gem_wait(fd, &wait), -ENOENT);
+}
+
+#define BUSY 1
+#define HANG 2
+#define AWAIT 4
+#define WRITE 8
+
+static void basic(int fd, unsigned engine, unsigned flags)
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t plug =
+		flags & (WRITE | AWAIT) ? igt_cork_plug(&cork, fd) : 0;
+	igt_spin_t *spin = igt_spin_batch_new(fd,
+					      .engine = engine,
+					      .dependency = plug);
+	struct drm_i915_gem_wait wait = {
+		flags & WRITE ? plug : spin->handle
+	};
+
+	igt_assert_eq(__gem_wait(fd, &wait), -ETIME);
+
+	if (flags & BUSY) {
+		struct timespec tv = {};
+		int timeout;
+
+		timeout = 120;
+		if ((flags & HANG) == 0) {
+			igt_spin_batch_set_timeout(spin, NSEC_PER_SEC/2);
+			timeout = 1;
+		}
+
+		if (flags & (WRITE | AWAIT))
+			igt_cork_unplug(&cork);
+
+		igt_assert_eq(__gem_wait(fd, &wait), -ETIME);
+
+		while (__gem_wait(fd, &wait) == -ETIME)
+			igt_assert(igt_seconds_elapsed(&tv) < timeout);
+	} else {
+		wait.timeout_ns = NSEC_PER_SEC / 2; /* 0.5s */
+		igt_assert_eq(__gem_wait(fd, &wait), -ETIME);
+		igt_assert_eq_s64(wait.timeout_ns, 0);
+
+		if (flags & (WRITE | AWAIT))
+			igt_cork_unplug(&cork);
+
+		wait.timeout_ns = 0;
+		igt_assert_eq(__gem_wait(fd, &wait), -ETIME);
+
+		if ((flags & HANG) == 0) {
+			igt_spin_batch_set_timeout(spin, NSEC_PER_SEC/2);
+			wait.timeout_ns = NSEC_PER_SEC; /* 1.0s */
+			igt_assert_eq(__gem_wait(fd, &wait), 0);
+			igt_assert(wait.timeout_ns >= 0);
+		} else {
+			wait.timeout_ns = -1;
+			igt_assert_eq(__gem_wait(fd, &wait), 0);
+			igt_assert(wait.timeout_ns == -1);
+		}
+
+		wait.timeout_ns = 0;
+		igt_assert_eq(__gem_wait(fd, &wait), 0);
+		igt_assert(wait.timeout_ns == 0);
+	}
+
+	if (plug)
+		gem_close(fd, plug);
+	igt_spin_batch_free(fd, spin);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int fd = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	igt_subtest("invalid-flags")
+		invalid_flags(fd);
+
+	igt_subtest("invalid-buf")
+		invalid_buf(fd);
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_fork_hang_detector(fd);
+			igt_fork_signal_helper();
+		}
+
+		igt_subtest("basic-busy-all") {
+			gem_quiescent_gpu(fd);
+			basic(fd, ALL_ENGINES, BUSY);
+		}
+		igt_subtest("basic-wait-all") {
+			gem_quiescent_gpu(fd);
+			basic(fd, ALL_ENGINES, 0);
+		}
+		igt_subtest("basic-await-all") {
+			gem_quiescent_gpu(fd);
+			basic(fd, ALL_ENGINES, AWAIT);
+		}
+		igt_subtest("basic-busy-write-all") {
+			gem_quiescent_gpu(fd);
+			basic(fd, ALL_ENGINES, BUSY | WRITE);
+		}
+		igt_subtest("basic-wait-write-all") {
+			gem_quiescent_gpu(fd);
+			basic(fd, ALL_ENGINES, WRITE);
+		}
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_group {
+				igt_subtest_f("busy-%s", e->name) {
+					gem_quiescent_gpu(fd);
+					basic(fd, e->exec_id | e->flags, BUSY);
+				}
+				igt_subtest_f("wait-%s", e->name) {
+					gem_quiescent_gpu(fd);
+					basic(fd, e->exec_id | e->flags, 0);
+				}
+				igt_subtest_f("await-%s", e->name) {
+					gem_quiescent_gpu(fd);
+					basic(fd, e->exec_id | e->flags, AWAIT);
+				}
+				igt_subtest_f("write-busy-%s", e->name) {
+					gem_quiescent_gpu(fd);
+					basic(fd, e->exec_id | e->flags, BUSY | WRITE);
+				}
+				igt_subtest_f("write-wait-%s", e->name) {
+					gem_quiescent_gpu(fd);
+					basic(fd, e->exec_id | e->flags, WRITE);
+				}
+			}
+		}
+
+		igt_fixture {
+			igt_stop_signal_helper();
+			igt_stop_hang_detector();
+		}
+	}
+
+	igt_subtest_group {
+		igt_hang_t hang;
+
+		igt_fixture {
+			hang = igt_allow_hang(fd, 0, 0);
+			igt_fork_signal_helper();
+		}
+
+		igt_subtest("hang-busy-all") {
+			gem_quiescent_gpu(fd);
+			basic(fd, ALL_ENGINES, BUSY | HANG);
+		}
+		igt_subtest("hang-wait-all") {
+			gem_quiescent_gpu(fd);
+			basic(fd, ALL_ENGINES, HANG);
+		}
+
+		igt_subtest("hang-busy-write-all") {
+			gem_quiescent_gpu(fd);
+			basic(fd, ALL_ENGINES, BUSY | WRITE | HANG);
+		}
+		igt_subtest("hang-wait-write-all") {
+			gem_quiescent_gpu(fd);
+			basic(fd, ALL_ENGINES, WRITE | HANG);
+		}
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_f("hang-busy-%s", e->name) {
+				gem_quiescent_gpu(fd);
+				basic(fd, e->exec_id | e->flags, HANG | BUSY);
+			}
+			igt_subtest_f("hang-wait-%s", e->name) {
+				gem_quiescent_gpu(fd);
+				basic(fd, e->exec_id | e->flags, HANG);
+			}
+			igt_subtest_f("hang-busy-write-%s", e->name) {
+				gem_quiescent_gpu(fd);
+				basic(fd, e->exec_id | e->flags, HANG | WRITE | BUSY);
+			}
+			igt_subtest_f("hang-wait-write-%s", e->name) {
+				gem_quiescent_gpu(fd);
+				basic(fd, e->exec_id | e->flags, HANG | WRITE);
+			}
+		}
+
+		igt_fixture {
+			igt_stop_signal_helper();
+			igt_disallow_hang(fd, hang);
+		}
+	}
+
+	igt_fixture {
+		close(fd);
+	}
+}
diff --git a/tests/i915/gem_workarounds.c b/tests/i915/gem_workarounds.c
new file mode 100644
index 00000000..78478ad2
--- /dev/null
+++ b/tests/i915/gem_workarounds.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *  Arun Siluvery <arun.siluvery@linux.intel.com>
+ *
+ */
+
+#include "igt.h"
+
+#include <fcntl.h>
+
+#define PAGE_SIZE 4096
+#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
+
+static int gen;
+
+enum operation {
+	GPU_RESET,
+	SUSPEND_RESUME,
+	HIBERNATE_RESUME,
+	SIMPLE_READ,
+};
+
+struct intel_wa_reg {
+	uint32_t addr;
+	uint32_t value;
+	uint32_t mask;
+};
+
+static struct write_only_list {
+	unsigned int gen;
+	uint32_t addr;
+} wo_list[] = {
+	{ 10, 0xE5F0 } /* WaForceContextSaveRestoreNonCoherent:cnl */
+
+	/*
+	 * FIXME: If you are contemplating adding stuff here
+	 * consider this as a temporary solution. You need to
+	 * manually check from context image that your workaround
+	 * is having an effect. Consider creating a context image
+	 * validator to act as a superior solution.
+	 */
+};
+
+static struct intel_wa_reg *wa_regs;
+static int num_wa_regs;
+
+static bool write_only(const uint32_t addr)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(wo_list); i++) {
+		if (gen == wo_list[i].gen &&
+		    addr == wo_list[i].addr) {
+			igt_info("Skipping check for 0x%x due to write only\n", addr);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+#define MI_STORE_REGISTER_MEM (0x24 << 23)
+
+static int workaround_fail_count(int fd, uint32_t ctx)
+{
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry *reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t result_sz, batch_sz;
+	uint32_t *base, *out;
+	int fail_count = 0;
+
+	reloc = calloc(num_wa_regs, sizeof(*reloc));
+	igt_assert(reloc);
+
+	result_sz = 4 * num_wa_regs;
+	result_sz = PAGE_ALIGN(result_sz);
+
+	batch_sz = 16 * num_wa_regs + 4;
+	batch_sz = PAGE_ALIGN(batch_sz);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, result_sz);
+	gem_set_caching(fd, obj[0].handle, I915_CACHING_CACHED);
+	obj[1].handle = gem_create(fd, batch_sz);
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+	obj[1].relocation_count = num_wa_regs;
+
+	out = base = gem_mmap__cpu(fd, obj[1].handle, 0, batch_sz, PROT_WRITE);
+	for (int i = 0; i < num_wa_regs; i++) {
+		*out++ = MI_STORE_REGISTER_MEM | ((gen >= 8 ? 4 : 2) - 2);
+		*out++ = wa_regs[i].addr;
+		reloc[i].target_handle = obj[0].handle;
+		reloc[i].offset = (out - base) * sizeof(*out);
+		reloc[i].delta = i * sizeof(uint32_t);
+		reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+		*out++ = reloc[i].delta;
+		if (gen >= 8)
+			*out++ = 0;
+	}
+	*out++ = MI_BATCH_BUFFER_END;
+	munmap(base, batch_sz);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.rsvd1 = ctx;
+	gem_execbuf(fd, &execbuf);
+
+	gem_set_domain(fd, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+	igt_debug("Address\tval\t\tmask\t\tread\t\tresult\n");
+
+	out = gem_mmap__cpu(fd, obj[0].handle, 0, result_sz, PROT_READ);
+	for (int i = 0; i < num_wa_regs; i++) {
+		const bool ok =
+			(wa_regs[i].value & wa_regs[i].mask) ==
+			(out[i] & wa_regs[i].mask);
+		char buf[80];
+
+		snprintf(buf, sizeof(buf),
+			 "0x%05X\t0x%08X\t0x%08X\t0x%08X",
+			 wa_regs[i].addr, wa_regs[i].value, wa_regs[i].mask,
+			 out[i]);
+
+		if (ok) {
+			igt_debug("%s\tOK\n", buf);
+		} else if (write_only(wa_regs[i].addr)) {
+			igt_debug("%s\tIGNORED (w/o)\n", buf);
+		} else {
+			igt_warn("%s\tFAIL\n", buf);
+			fail_count++;
+		}
+	}
+	munmap(out, result_sz);
+
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[0].handle);
+	free(reloc);
+
+	return fail_count;
+}
+
+static int reopen(int fd)
+{
+	char path[256];
+
+	snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
+	fd = open(path, O_RDWR);
+	igt_assert_lte(0, fd);
+
+	return fd;
+}
+
+#define CONTEXT 0x1
+#define FD 0x2
+static void check_workarounds(int fd, enum operation op, unsigned int flags)
+{
+	uint32_t ctx = 0;
+
+	if (flags & FD)
+		fd = reopen(fd);
+
+	if (flags & CONTEXT) {
+		gem_require_contexts(fd);
+		ctx = gem_context_create(fd);
+	}
+
+	igt_assert_eq(workaround_fail_count(fd, ctx), 0);
+
+	switch (op) {
+	case GPU_RESET:
+		igt_force_gpu_reset(fd);
+		break;
+
+	case SUSPEND_RESUME:
+		igt_system_suspend_autoresume(SUSPEND_STATE_MEM,
+					      SUSPEND_TEST_NONE);
+		break;
+
+	case HIBERNATE_RESUME:
+		igt_system_suspend_autoresume(SUSPEND_STATE_DISK,
+					      SUSPEND_TEST_NONE);
+		break;
+
+	case SIMPLE_READ:
+		break;
+
+	default:
+		igt_assert(0);
+	}
+
+	igt_assert_eq(workaround_fail_count(fd, ctx), 0);
+
+	if (flags & CONTEXT)
+		gem_context_destroy(fd, ctx);
+	if (flags & FD)
+		close(fd);
+}
+
+igt_main
+{
+	int device = -1;
+	const struct {
+		const char *name;
+		enum operation op;
+	} ops[] =   {
+		{ "basic-read", SIMPLE_READ },
+		{ "reset", GPU_RESET },
+		{ "suspend-resume", SUSPEND_RESUME },
+		{ "hibernate-resume", HIBERNATE_RESUME },
+		{ }
+	}, *op;
+	const struct {
+		const char *name;
+		unsigned int flags;
+	} modes[] =   {
+		{ "", 0 },
+		{ "-context", CONTEXT },
+		{ "-fd", FD },
+		{ }
+	}, *m;
+
+	igt_fixture {
+		FILE *file;
+		char *line = NULL;
+		size_t line_size;
+		int i, fd;
+
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+
+		gen = intel_gen(intel_get_drm_devid(device));
+
+		fd = igt_debugfs_open(device, "i915_wa_registers", O_RDONLY);
+		file = fdopen(fd, "r");
+		igt_assert(getline(&line, &line_size, file) > 0);
+		igt_debug("i915_wa_registers: %s", line);
+		sscanf(line, "Workarounds applied: %d", &num_wa_regs);
+		igt_require(num_wa_regs > 0);
+
+		wa_regs = malloc(num_wa_regs * sizeof(*wa_regs));
+		igt_assert(wa_regs);
+
+		i = 0;
+		while (getline(&line, &line_size, file) > 0) {
+			igt_debug("%s", line);
+			if (sscanf(line, "0x%X: 0x%08X, mask: 0x%08X",
+				   &wa_regs[i].addr,
+				   &wa_regs[i].value,
+				   &wa_regs[i].mask) == 3)
+				i++;
+		}
+
+		igt_assert_lte(i, num_wa_regs);
+
+		free(line);
+		fclose(file);
+		close(fd);
+	}
+
+	for (op = ops; op->name; op++) {
+		for (m = modes; m->name; m++) {
+			igt_subtest_f("%s%s", op->name, m->name)
+				check_workarounds(device, op->op, m->flags);
+		}
+	}
+}
diff --git a/tests/i915/gem_write_read_ring_switch.c b/tests/i915/gem_write_read_ring_switch.c
new file mode 100644
index 00000000..ef229cc5
--- /dev/null
+++ b/tests/i915/gem_write_read_ring_switch.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "i830_reg.h"
+
+IGT_TEST_DESCRIPTION("Check read/write syncpoints when switching rings.");
+
+#define LOCAL_I915_EXEC_VEBOX (4<<0)
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *load_bo, *target_bo, *dummy_bo;
+int fd;
+
+/* Testcase: check read/write syncpoints when switching rings
+ *
+ * We've had a bug where the syncpoint for the last write was mangled after a
+ * ring switch using semaphores. This resulted in cpu reads returning before the
+ * write actually completed. This test exercises this.
+ */
+
+#define COLOR 0xffffffff
+static void run_test(int ring)
+{
+	uint32_t *ptr;
+	int i;
+
+	gem_require_ring(fd, ring);
+	/* Testing render only makes sense with separate blt. */
+	if (ring == I915_EXEC_RENDER)
+		gem_require_ring(fd, I915_EXEC_BLT);
+
+	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	igt_assert(target_bo);
+
+	/* Need to map first so that we can do our own domain mangement with
+	 * set_domain. */
+	drm_intel_bo_map(target_bo, 0);
+	ptr = target_bo->virtual;
+	igt_assert(*ptr == 0);
+
+	/* put some load onto the gpu to keep the light buffers active for long
+	 * enough */
+	for (i = 0; i < 1000; i++) {
+		BLIT_COPY_BATCH_START(0);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  4096);
+		OUT_BATCH(0); /* dst x1,y1 */
+		OUT_BATCH((1024 << 16) | 512);
+		OUT_RELOC_FENCED(load_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH((0 << 16) | 512); /* src x1, y1 */
+		OUT_BATCH(4096);
+		OUT_RELOC_FENCED(load_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+	}
+
+	COLOR_BLIT_COPY_BATCH_START(0);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xff << 16) |
+		  128);
+	OUT_BATCH(0); /* dst x1,y1 */
+	OUT_BATCH((1 << 16) | 1);
+	OUT_RELOC_FENCED(target_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(COLOR);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+
+	/* Emit an empty batch so that signalled seqno on the target ring >
+	 * signalled seqnoe on the blt ring. This is required to hit the bug. */
+	BEGIN_BATCH(2, 0);
+	OUT_BATCH(MI_NOOP);
+	OUT_BATCH(MI_NOOP);
+	ADVANCE_BATCH();
+	intel_batchbuffer_flush_on_ring(batch, ring);
+
+	/* For the ring->ring sync it's important to only emit a read reloc, for
+	 * otherwise the obj->last_write_seqno will be updated. */
+	if (ring == I915_EXEC_RENDER) {
+		BEGIN_BATCH(4, 1);
+		OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+		OUT_BATCH(0xffffffff); /* compare dword */
+		OUT_RELOC(target_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		OUT_BATCH(MI_NOOP);
+		ADVANCE_BATCH();
+	} else {
+		BEGIN_BATCH(4, 1);
+		OUT_BATCH(MI_FLUSH_DW | 1);
+		OUT_BATCH(0); /* reserved */
+		OUT_RELOC(target_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
+		ADVANCE_BATCH();
+	}
+	intel_batchbuffer_flush_on_ring(batch, ring);
+
+	gem_set_domain(fd, target_bo->handle, I915_GEM_DOMAIN_GTT, 0);
+	igt_assert(*ptr == COLOR);
+	drm_intel_bo_unmap(target_bo);
+
+	drm_intel_bo_unreference(target_bo);
+}
+
+igt_main
+{
+	static const struct {
+		const char *name;
+		int ring;
+	} tests[] = {
+		{ "blt2render", I915_EXEC_RENDER },
+		{ "blt2bsd", I915_EXEC_BSD },
+		{ "blt2vebox", LOCAL_I915_EXEC_VEBOX },
+	};
+	int i;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		/* Test requires MI_FLUSH_DW and MI_COND_BATCH_BUFFER_END */
+		igt_require(intel_gen(intel_get_drm_devid(fd)) >= 6);
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		igt_assert(bufmgr);
+		/* don't enable buffer reuse!! */
+		//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+		batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+		igt_assert(batch);
+
+		dummy_bo = drm_intel_bo_alloc(bufmgr, "dummy bo", 4096, 4096);
+		igt_assert(dummy_bo);
+
+		load_bo = drm_intel_bo_alloc(bufmgr, "load bo", 1024*4096, 4096);
+		igt_assert(load_bo);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		igt_subtest(tests[i].name)
+			run_test(tests[i].ring);
+	}
+
+	igt_fork_signal_helper();
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		igt_subtest_f("%s-interruptible", tests[i].name)
+			run_test(tests[i].ring);
+	}
+	igt_stop_signal_helper();
+
+	igt_fixture {
+		drm_intel_bufmgr_destroy(bufmgr);
+
+		close(fd);
+	}
+}
diff --git a/tests/i915/gen3_mixed_blits.c b/tests/i915/gen3_mixed_blits.c
new file mode 100644
index 00000000..948f4e6a
--- /dev/null
+++ b/tests/i915/gen3_mixed_blits.c
@@ -0,0 +1,521 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+#include "i915_reg.h"
+
+#define WIDTH (512)
+#define HEIGHT (512)
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+render_copy(int fd,
+	    uint32_t dst, int dst_tiling,
+	    uint32_t src, int src_tiling,
+	    int use_fence)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	uint32_t tiling_bits;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR |
+		(BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR |
+		(BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) |
+		CSB_TCB(6, 6) |
+		CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) |
+		ENABLE_TEXKILL_3D_4D |
+		TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+	if (use_fence) {
+		tiling_bits = MS3_USE_FENCE_REGS;
+	} else {
+		tiling_bits = 0;
+		if (src_tiling != I915_TILING_NONE)
+			tiling_bits = MS3_TILED_SURFACE;
+		if (src_tiling == I915_TILING_Y)
+			tiling_bits |= MS3_TILE_WALK;
+	}
+
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 | tiling_bits |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	if (use_fence) {
+		tiling_bits = BUF_3D_USE_FENCE;
+	} else {
+		tiling_bits = 0;
+		if (dst_tiling != I915_TILING_NONE)
+			tiling_bits = BUF_3D_TILED_SURFACE;
+		if (dst_tiling == I915_TILING_Y)
+			tiling_bits |= BUF_3D_TILE_WALK_Y;
+	}
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | tiling_bits | WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	igt_assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	igt_assert(r-reloc == 2);
+
+	tiling_bits = 0;
+	if (use_fence)
+		tiling_bits = EXEC_OBJECT_NEEDS_FENCE;
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = tiling_bits;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = tiling_bits;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	gem_execbuf(fd, &exec);
+
+	gem_close(fd, handle);
+}
+
+static void blt_copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+
+	*b++ = (XY_SRC_COPY_BLT_CMD |
+		XY_SRC_COPY_BLT_WRITE_ALPHA |
+		XY_SRC_COPY_BLT_WRITE_RGB | 6);
+	*b++ = 3 << 24 | 0xcc << 16 | WIDTH * 4;
+	*b++ = 0;
+	*b++ = HEIGHT << 16 | WIDTH;
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); b++;
+	*b++ = 0;
+	*b++ = WIDTH*4;
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_RENDER, 0); b++;
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	igt_assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	igt_assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	gem_execbuf(fd, &exec);
+
+	gem_close(fd, handle);
+}
+
+
+static void
+copy(int fd,
+     uint32_t dst, int dst_tiling,
+     uint32_t src, int src_tiling)
+{
+retry:
+	switch (random() % 3) {
+	case 0: render_copy(fd, dst, dst_tiling, src, src_tiling, 0); break;
+	case 1: render_copy(fd, dst, dst_tiling, src, src_tiling, 1); break;
+	case 2: if (dst_tiling == I915_TILING_Y || src_tiling == I915_TILING_Y)
+			goto retry;
+		blt_copy(fd, dst, src);
+		break;
+	}
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val, int tiling)
+{
+	uint32_t handle;
+	uint32_t *v;
+	int i;
+
+	handle = gem_create(fd, WIDTH*HEIGHT*4);
+	gem_set_tiling(fd, handle, tiling, WIDTH*4);
+
+	/* Fill the BO with dwords starting at val */
+	v = gem_mmap__gtt(fd, handle, WIDTH * HEIGHT * 4,
+			  PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		v[i] = val++;
+	munmap(v, WIDTH*HEIGHT*4);
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	uint32_t *v;
+	int i;
+
+	v = gem_mmap__gtt(fd, handle, WIDTH * HEIGHT * 4, PROT_READ);
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		igt_assert_f(v[i] == val,
+			     "Expected 0x%08x, found 0x%08x "
+			     "at offset 0x%08x\n",
+			     val, v[i], i * 4);
+		val++;
+	}
+	munmap(v, WIDTH*HEIGHT*4);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *tiling, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	igt_simple_init(argc, argv);
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_require(IS_GEN3(intel_get_drm_devid(fd)));
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	igt_info("Using %d 1MiB buffers\n", count);
+	intel_require_memory(count, 1024*1024, CHECK_RAM);
+
+	handle = malloc(sizeof(uint32_t)*count*3);
+	tiling = handle + count;
+	start_val = tiling + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start, tiling[i] = i % 3);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	igt_info("Verifying initialisation..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_info("Cyclic blits, forward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	igt_info("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_info("Cyclic blits, backward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	igt_info("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_info("Random blits..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		while (src == dst)
+			dst = random() % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	igt_info("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_exit();
+}
diff --git a/tests/i915/gen3_render_linear_blits.c b/tests/i915/gen3_render_linear_blits.c
new file mode 100644
index 00000000..9d1499a5
--- /dev/null
+++ b/tests/i915/gen3_render_linear_blits.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+
+#include "i915_reg.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static uint32_t linear[WIDTH*HEIGHT];
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
+					 IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
+					 IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	igt_assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	igt_assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	gem_execbuf(fd, &exec);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+
+	/* Fill the BO with dwords starting at val */
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		linear[i] = val++;
+	gem_write(fd, handle, 0, linear, sizeof(linear));
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	int i;
+
+	gem_read(fd, handle, 0, linear, sizeof(linear));
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		igt_assert_f(linear[i] == val,
+			     "Expected 0x%08x, found 0x%08x "
+			     "at offset 0x%08x\n",
+			     val, linear[i], i * 4);
+		val++;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	igt_simple_init(argc, argv);
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_require(IS_GEN3(intel_get_drm_devid(fd)));
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	igt_info("Using %d 1MiB buffers\n", count);
+	intel_require_memory(count, 1024*1024, CHECK_RAM);
+
+	handle = malloc(sizeof(uint32_t)*count*2);
+	start_val = handle + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	igt_info("Verifying initialisation...\n");
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_info("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_info("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_info("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_exit();
+}
diff --git a/tests/i915/gen3_render_mixed_blits.c b/tests/i915/gen3_render_mixed_blits.c
new file mode 100644
index 00000000..afb53a59
--- /dev/null
+++ b/tests/i915/gen3_render_mixed_blits.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+#include "i915_reg.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+copy(int fd,
+     uint32_t dst, int dst_tiling,
+     uint32_t src, int src_tiling)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	uint32_t tiling_bits;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
+					 IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
+					 IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+	tiling_bits = 0;
+	if (src_tiling != I915_TILING_NONE)
+		tiling_bits = MS3_TILED_SURFACE;
+	if (src_tiling == I915_TILING_Y)
+		tiling_bits |= MS3_TILE_WALK;
+
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 | tiling_bits |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	tiling_bits = 0;
+	if (dst_tiling != I915_TILING_NONE)
+		tiling_bits = BUF_3D_TILED_SURFACE;
+	if (dst_tiling == I915_TILING_Y)
+		tiling_bits |= BUF_3D_TILE_WALK_Y;
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | tiling_bits | WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	igt_assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	igt_assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	gem_execbuf(fd, &exec);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val, int tiling)
+{
+	uint32_t handle;
+	uint32_t *v;
+	int i;
+
+	handle = gem_create(fd, WIDTH*HEIGHT*4);
+	gem_set_tiling(fd, handle, tiling, WIDTH*4);
+
+	/* Fill the BO with dwords starting at val */
+	v = gem_mmap__gtt(fd, handle, WIDTH * HEIGHT * 4,
+			  PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		v[i] = val++;
+	munmap(v, WIDTH*HEIGHT*4);
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	uint32_t *v;
+	int i;
+
+	v = gem_mmap__gtt(fd, handle, WIDTH * HEIGHT * 4, PROT_READ);
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		igt_assert_f(v[i] == val,
+			     "Expected 0x%08x, found 0x%08x "
+			     "at offset 0x%08x\n",
+			     val, v[i], i * 4);
+		val++;
+	}
+	munmap(v, WIDTH*HEIGHT*4);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *tiling, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	igt_simple_init(argc, argv);
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_require(IS_GEN3(intel_get_drm_devid(fd)));
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	igt_info("Using %d 1MiB buffers\n", count);
+	intel_require_memory(count, 1024*1024, CHECK_RAM);
+
+	handle = malloc(sizeof(uint32_t)*count*3);
+	tiling = handle + count;
+	start_val = tiling + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start, tiling[i] = i % 3);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	igt_info("Verifying initialisation..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_info("Cyclic blits, forward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	igt_info("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_info("Cyclic blits, backward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	igt_info("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_info("Random blits..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		while (src == dst)
+			dst = random() % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	igt_info("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_exit();
+}
diff --git a/tests/i915/gen3_render_tiledx_blits.c b/tests/i915/gen3_render_tiledx_blits.c
new file mode 100644
index 00000000..e6246f2b
--- /dev/null
+++ b/tests/i915/gen3_render_tiledx_blits.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+#include "i915_reg.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
+					 IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
+					 IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 |
+		MS3_TILED_SURFACE |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | BUF_3D_TILED_SURFACE |  WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	igt_assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	igt_assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	gem_execbuf(fd, &exec);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	uint32_t *v;
+	int i;
+
+	handle = gem_create(fd, WIDTH*HEIGHT*4);
+	gem_set_tiling(fd, handle, I915_TILING_X, WIDTH*4);
+
+	/* Fill the BO with dwords starting at val */
+	v = gem_mmap__gtt(fd, handle, WIDTH * HEIGHT * 4,
+			  PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		v[i] = val++;
+	munmap(v, WIDTH*HEIGHT*4);
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	uint32_t *v;
+	int i;
+
+	v = gem_mmap__gtt(fd, handle, WIDTH * HEIGHT * 4, PROT_READ);
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		igt_assert_f(v[i] == val,
+			     "Expected 0x%08x, found 0x%08x "
+			     "at offset 0x%08x\n",
+			     val, v[i], i * 4);
+		val++;
+	}
+	munmap(v, WIDTH*HEIGHT*4);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	igt_simple_init(argc, argv);
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_require(IS_GEN3(intel_get_drm_devid(fd)));
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	igt_info("Using %d 1MiB buffers\n", count);
+	intel_require_memory(count, 1024*1024, CHECK_RAM);
+
+	handle = malloc(sizeof(uint32_t)*count*2);
+	start_val = handle + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	igt_info("Verifying initialisation...\n");
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_info("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_info("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_info("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	igt_exit();
+}
diff --git a/tests/i915/gen3_render_tiledy_blits.c b/tests/i915/gen3_render_tiledy_blits.c
new file mode 100644
index 00000000..17502ccb
--- /dev/null
+++ b/tests/i915/gen3_render_tiledy_blits.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+
+#include "i915_reg.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
+					 IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
+					 IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 |
+		MS3_TILED_SURFACE | MS3_TILE_WALK |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | BUF_3D_TILED_SURFACE | BUF_3D_TILE_WALK_Y | WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	igt_assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	igt_assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	gem_execbuf(fd, &exec);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	uint32_t *v;
+	int i;
+
+	handle = gem_create(fd, WIDTH*HEIGHT*4);
+	gem_set_tiling(fd, handle, I915_TILING_Y, WIDTH*4);
+
+	/* Fill the BO with dwords starting at val */
+	v = gem_mmap__gtt(fd, handle, WIDTH * HEIGHT * 4,
+			  PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		v[i] = val++;
+	munmap(v, WIDTH*HEIGHT*4);
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	uint32_t *v;
+	int i;
+
+	v = gem_mmap__gtt(fd, handle, WIDTH * HEIGHT * 4, PROT_READ);
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		igt_assert_f(v[i] == val,
+			     "Expected 0x%08x, found 0x%08x "
+			     "at offset 0x%08x\n",
+			     val, v[i], i * 4);
+		val++;
+	}
+	munmap(v, WIDTH*HEIGHT*4);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	igt_simple_init(argc, argv);
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	igt_require(IS_GEN3(intel_get_drm_devid(fd)));
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	igt_info("Using %d 1MiB buffers\n", count);
+	intel_require_memory(count, 1024*1024, CHECK_RAM);
+
+	handle = malloc(sizeof(uint32_t)*count*2);
+	start_val = handle + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	igt_info("Verifying initialisation..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_info("Cyclic blits, forward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	igt_info("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_info("Cyclic blits, backward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	igt_info("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_info("Random blits..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		while (src == dst)
+			dst = random() % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	igt_info("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	igt_info("done\n");
+
+	igt_exit();
+}
diff --git a/tests/i915/query.c b/tests/i915/query.c
new file mode 100644
index 00000000..08aabf94
--- /dev/null
+++ b/tests/i915/query.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+
+#include <limits.h>
+
+IGT_TEST_DESCRIPTION("Testing the i915 query uAPI.");
+
+/*
+ * We should at least get 3 bytes for data for each slices, subslices & EUs
+ * masks.
+ */
+#define MIN_TOPOLOGY_ITEM_SIZE (sizeof(struct drm_i915_query_topology_info) + 3)
+
+static int
+__i915_query(int fd, struct drm_i915_query *q)
+{
+	if (igt_ioctl(fd, DRM_IOCTL_I915_QUERY, q))
+		return -errno;
+	return 0;
+}
+
+static int
+__i915_query_items(int fd, struct drm_i915_query_item *items, uint32_t n_items)
+{
+	struct drm_i915_query q = {
+		.num_items = n_items,
+		.items_ptr = to_user_pointer(items),
+	};
+	return __i915_query(fd, &q);
+}
+
+#define i915_query_items(fd, items, n_items) do { \
+		igt_assert_eq(__i915_query_items(fd, items, n_items), 0); \
+		errno = 0; \
+	} while (0)
+#define i915_query_items_err(fd, items, n_items, err) do { \
+		igt_assert_eq(__i915_query_items(fd, items, n_items), -err); \
+	} while (0)
+
+static bool has_query_supports(int fd)
+{
+	struct drm_i915_query query = {};
+
+	return __i915_query(fd, &query) == 0;
+}
+
+static void test_query_garbage(int fd)
+{
+	struct drm_i915_query query;
+	struct drm_i915_query_item item;
+
+	/* Verify that invalid query pointers are rejected. */
+	igt_assert_eq(__i915_query(fd, NULL), -EFAULT);
+	igt_assert_eq(__i915_query(fd, (void *) -1), -EFAULT);
+
+	/*
+	 * Query flags field is currently valid only if equals to 0. This might
+	 * change in the future.
+	 */
+	memset(&query, 0, sizeof(query));
+	query.flags = 42;
+	igt_assert_eq(__i915_query(fd, &query), -EINVAL);
+
+	/* Test a couple of invalid pointers. */
+	i915_query_items_err(fd, (void *) ULONG_MAX, 1, EFAULT);
+	i915_query_items_err(fd, (void *) 0, 1, EFAULT);
+
+	/* Test the invalid query id = 0. */
+	memset(&item, 0, sizeof(item));
+	i915_query_items_err(fd, &item, 1, EINVAL);
+}
+
+static void test_query_garbage_items(int fd)
+{
+	struct drm_i915_query_item items[2];
+	struct drm_i915_query_item *items_ptr;
+	int i, n_items;
+
+	/*
+	 * Query item flags field is currently valid only if equals to 0.
+	 * Subject to change in the future.
+	 */
+	memset(items, 0, sizeof(items));
+	items[0].query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	items[0].flags = 42;
+	i915_query_items(fd, items, 1);
+	igt_assert_eq(items[0].length, -EINVAL);
+
+	/*
+	 * Test an invalid query id in the second item and verify that the first
+	 * one is properly processed.
+	 */
+	memset(items, 0, sizeof(items));
+	items[0].query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	items[1].query_id = ULONG_MAX;
+	i915_query_items(fd, items, 2);
+	igt_assert_lte(MIN_TOPOLOGY_ITEM_SIZE, items[0].length);
+	igt_assert_eq(items[1].length, -EINVAL);
+
+	/*
+	 * Test a invalid query id in the first item and verify that the second
+	 * one is properly processed (the driver is expected to go through them
+	 * all and place error codes in the failed items).
+	 */
+	memset(items, 0, sizeof(items));
+	items[0].query_id = ULONG_MAX;
+	items[1].query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	i915_query_items(fd, items, 2);
+	igt_assert_eq(items[0].length, -EINVAL);
+	igt_assert_lte(MIN_TOPOLOGY_ITEM_SIZE, items[1].length);
+
+	/* Test a couple of invalid data pointer in query item. */
+	memset(items, 0, sizeof(items));
+	items[0].query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	i915_query_items(fd, items, 1);
+	igt_assert_lte(MIN_TOPOLOGY_ITEM_SIZE, items[0].length);
+
+	items[0].data_ptr = 0;
+	i915_query_items(fd, items, 1);
+	igt_assert_eq(items[0].length, -EFAULT);
+
+	items[0].data_ptr = ULONG_MAX;
+	i915_query_items(fd, items, 1);
+	igt_assert_eq(items[0].length, -EFAULT);
+
+
+	/* Test an invalid query item length. */
+	memset(items, 0, sizeof(items));
+	items[0].query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	items[1].query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	items[1].length = sizeof(struct drm_i915_query_topology_info) - 1;
+	i915_query_items(fd, items, 2);
+	igt_assert_lte(MIN_TOPOLOGY_ITEM_SIZE, items[0].length);
+	igt_assert_eq(items[1].length, -EINVAL);
+
+	/*
+	 * Map memory for a query item in which the kernel is going to write the
+	 * length of the item in the first ioctl(). Then unmap that memory and
+	 * verify that the kernel correctly returns EFAULT as memory of the item
+	 * has been removed from our address space.
+	 */
+	items_ptr = mmap(0, 4096, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	items_ptr[0].query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	i915_query_items(fd, items_ptr, 1);
+	igt_assert_lte(MIN_TOPOLOGY_ITEM_SIZE, items_ptr[0].length);
+	munmap(items_ptr, 4096);
+	i915_query_items_err(fd, items_ptr, 1, EFAULT);
+
+	/*
+	 * Map memory for a query item, then make it read only and verify that
+	 * the kernel errors out with EFAULT.
+	 */
+	items_ptr = mmap(0, 4096, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	items_ptr[0].query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	igt_assert_eq(0, mprotect(items_ptr, 4096, PROT_READ));
+	i915_query_items_err(fd, items_ptr, 1, EFAULT);
+	munmap(items_ptr, 4096);
+
+	/*
+	 * Allocate 2 pages, prepare those 2 pages with valid query items, then
+	 * switch the second page to read only and expect an EFAULT error.
+	 */
+	items_ptr = mmap(0, 8192, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	memset(items_ptr, 0, 8192);
+	n_items = 8192 / sizeof(struct drm_i915_query_item);
+	for (i = 0; i < n_items; i++)
+		items_ptr[i].query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	mprotect(((uint8_t *)items_ptr) + 4096, 4096, PROT_READ);
+	i915_query_items_err(fd, items_ptr, n_items, EFAULT);
+	munmap(items_ptr, 8192);
+}
+
+/*
+ * Allocate more on both sides of where the kernel is going to write and verify
+ * that it writes only where it's supposed to.
+ */
+static void test_query_topology_kernel_writes(int fd)
+{
+	struct drm_i915_query_item item;
+	struct drm_i915_query_topology_info *topo_info;
+	uint8_t *_topo_info;
+	int b, total_size;
+
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	i915_query_items(fd, &item, 1);
+	igt_assert_lte(MIN_TOPOLOGY_ITEM_SIZE, item.length);
+
+	total_size = item.length + 2 * sizeof(*_topo_info);
+	_topo_info = malloc(total_size);
+	memset(_topo_info, 0xff, total_size);
+	topo_info = (struct drm_i915_query_topology_info *) (_topo_info + sizeof(*_topo_info));
+	memset(topo_info, 0, item.length);
+
+	item.data_ptr = to_user_pointer(topo_info);
+	i915_query_items(fd, &item, 1);
+
+	for (b = 0; b < sizeof(*_topo_info); b++) {
+		igt_assert_eq(_topo_info[b], 0xff);
+		igt_assert_eq(_topo_info[sizeof(*_topo_info) + item.length + b], 0xff);
+	}
+}
+
+static bool query_topology_supported(int fd)
+{
+	struct drm_i915_query_item item = {
+		.query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
+	};
+
+	return __i915_query_items(fd, &item, 1) == 0 && item.length > 0;
+}
+
+static void test_query_topology_unsupported(int fd)
+{
+	struct drm_i915_query_item item = {
+		.query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
+	};
+
+	i915_query_items(fd, &item, 1);
+	igt_assert_eq(item.length, -ENODEV);
+}
+
+static bool
+slice_available(const struct drm_i915_query_topology_info *topo_info,
+		int s)
+{
+	return (topo_info->data[s / 8] >> (s % 8)) & 1;
+}
+
+static bool
+subslice_available(const struct drm_i915_query_topology_info *topo_info,
+		   int s, int ss)
+{
+	return (topo_info->data[topo_info->subslice_offset +
+				s * topo_info->subslice_stride +
+				ss / 8] >> (ss % 8)) & 1;
+}
+
+static bool
+eu_available(const struct drm_i915_query_topology_info *topo_info,
+	     int s, int ss, int eu)
+{
+	return (topo_info->data[topo_info->eu_offset +
+				(s * topo_info->max_subslices + ss) * topo_info->eu_stride +
+				eu / 8] >> (eu % 8)) & 1;
+}
+
+/*
+ * Verify that we get coherent values between the legacy getparam slice/subslice
+ * masks and the new topology query.
+ */
+static void
+test_query_topology_coherent_slice_mask(int fd)
+{
+	struct drm_i915_query_item item;
+	struct drm_i915_query_topology_info *topo_info;
+	drm_i915_getparam_t gp;
+	int slice_mask, subslice_mask;
+	int s, topology_slices, topology_subslices_slice0;
+	int32_t first_query_length;
+
+	gp.param = I915_PARAM_SLICE_MASK;
+	gp.value = &slice_mask;
+	igt_skip_on(igt_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) != 0);
+
+	gp.param = I915_PARAM_SUBSLICE_MASK;
+	gp.value = &subslice_mask;
+	igt_skip_on(igt_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) != 0);
+
+	/* Slices */
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	i915_query_items(fd, &item, 1);
+	/* We expect at least one byte for each slices, subslices & EUs masks. */
+	igt_assert_lte(MIN_TOPOLOGY_ITEM_SIZE, item.length);
+	first_query_length = item.length;
+
+	topo_info = calloc(1, item.length);
+
+	item.data_ptr = to_user_pointer(topo_info);
+	i915_query_items(fd, &item, 1);
+	/* We should get the same size once the data has been written. */
+	igt_assert_eq(first_query_length, item.length);
+	/* We expect at least one byte for each slices, subslices & EUs masks. */
+	igt_assert_lte(MIN_TOPOLOGY_ITEM_SIZE, item.length);
+
+	topology_slices = 0;
+	for (s = 0; s < topo_info->max_slices; s++) {
+		if (slice_available(topo_info, s))
+			topology_slices |= 1UL << s;
+	}
+
+	igt_debug("slice mask getparam=0x%x / query=0x%x\n",
+		  slice_mask, topology_slices);
+
+	/* These 2 should always match. */
+	igt_assert_eq(slice_mask, topology_slices);
+
+	topology_subslices_slice0 = 0;
+	for (s = 0; s < topo_info->max_subslices; s++) {
+		if (subslice_available(topo_info, 0, s))
+			topology_subslices_slice0 |= 1UL << s;
+	}
+
+	igt_debug("subslice mask getparam=0x%x / query=0x%x\n",
+		  subslice_mask, topology_subslices_slice0);
+
+	/*
+	 * I915_PARAM_SUBSLICE_MASK returns the value for slice0, we should
+	 * match the values for the first slice of the topology.
+	 */
+	igt_assert_eq(subslice_mask, topology_subslices_slice0);
+
+	free(topo_info);
+}
+
+/*
+ * Verify that we get same total number of EUs from getparam and topology query.
+ */
+static void
+test_query_topology_matches_eu_total(int fd)
+{
+	struct drm_i915_query_item item;
+	struct drm_i915_query_topology_info *topo_info;
+	drm_i915_getparam_t gp;
+	int n_eus, n_eus_topology, s;
+
+	gp.param = I915_PARAM_EU_TOTAL;
+	gp.value = &n_eus;
+	do_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	igt_debug("n_eus=%i\n", n_eus);
+
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	i915_query_items(fd, &item, 1);
+
+	topo_info = calloc(1, item.length);
+
+	item.data_ptr = to_user_pointer(topo_info);
+	i915_query_items(fd, &item, 1);
+
+	igt_debug("max_slices=%hu max_subslices=%hu max_eus_per_subslice=%hu\n",
+		  topo_info->max_slices, topo_info->max_subslices,
+		  topo_info->max_eus_per_subslice);
+	igt_debug(" subslice_offset=%hu subslice_stride=%hu\n",
+		  topo_info->subslice_offset, topo_info->subslice_stride);
+	igt_debug(" eu_offset=%hu eu_stride=%hu\n",
+		  topo_info->eu_offset, topo_info->eu_stride);
+
+	n_eus_topology = 0;
+	for (s = 0; s < topo_info->max_slices; s++) {
+		int ss;
+
+		igt_debug("slice%i:\n", s);
+
+		for (ss = 0; ss < topo_info->max_subslices; ss++) {
+			int eu, n_subslice_eus = 0;
+
+			igt_debug("\tsubslice: %i\n", ss);
+
+			igt_debug("\t\teu_mask: 0b");
+			for (eu = 0; eu < topo_info->max_eus_per_subslice; eu++) {
+				uint8_t val = eu_available(topo_info, s, ss,
+							   topo_info->max_eus_per_subslice - 1 - eu);
+				igt_debug("%hhi", val);
+				n_subslice_eus += __builtin_popcount(val);
+				n_eus_topology += __builtin_popcount(val);
+			}
+
+			igt_debug(" (%i)\n", n_subslice_eus);
+
+			/* Sanity checks. */
+			if (n_subslice_eus > 0) {
+				igt_assert(slice_available(topo_info, s));
+				igt_assert(subslice_available(topo_info, s, ss));
+			}
+			if (subslice_available(topo_info, s, ss)) {
+				igt_assert(slice_available(topo_info, s));
+			}
+		}
+	}
+
+	free(topo_info);
+
+	igt_assert(n_eus_topology == n_eus);
+}
+
+/*
+ * Verify some numbers on Gens that we know for sure the characteristics from
+ * the PCI ids.
+ */
+static void
+test_query_topology_known_pci_ids(int fd, int devid)
+{
+	const struct intel_device_info *dev_info = intel_get_device_info(devid);
+	struct drm_i915_query_item item;
+	struct drm_i915_query_topology_info *topo_info;
+	int n_slices = 0, n_subslices = 0;
+	int s, ss;
+
+	/* The GT size on some Broadwell skus is not defined, skip those. */
+	igt_skip_on(dev_info->gt == 0);
+
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_TOPOLOGY_INFO;
+	i915_query_items(fd, &item, 1);
+
+	topo_info = (struct drm_i915_query_topology_info *) calloc(1, item.length);
+
+	item.data_ptr = to_user_pointer(topo_info);
+	i915_query_items(fd, &item, 1);
+
+	for (s = 0; s < topo_info->max_slices; s++) {
+		if (slice_available(topo_info, s))
+			n_slices++;
+
+		for (ss = 0; ss < topo_info->max_subslices; ss++) {
+			if (subslice_available(topo_info, s, ss))
+				n_subslices++;
+		}
+	}
+
+	igt_debug("Platform=%s GT=%u slices=%u subslices=%u\n",
+		  dev_info->codename, dev_info->gt, n_slices, n_subslices);
+
+	switch (dev_info->gt) {
+	case 1:
+		igt_assert_eq(n_slices, 1);
+		igt_assert(n_subslices == 2 || n_subslices == 3);
+		break;
+	case 2:
+		igt_assert_eq(n_slices, 1);
+		if (dev_info->is_haswell)
+			igt_assert_eq(n_subslices, 2);
+		else
+			igt_assert_eq(n_subslices, 3);
+		break;
+	case 3:
+		igt_assert_eq(n_slices, 2);
+		if (dev_info->is_haswell)
+			igt_assert_eq(n_subslices, 2 * 2);
+		else
+			igt_assert_eq(n_subslices, 2 * 3);
+		break;
+	case 4:
+		igt_assert_eq(n_slices, 3);
+		igt_assert_eq(n_subslices, 3 * 3);
+		break;
+	default:
+		igt_assert(false);
+	}
+
+	free(topo_info);
+}
+
+igt_main
+{
+	int fd = -1;
+	int devid;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require(has_query_supports(fd));
+		devid = intel_get_drm_devid(fd);
+	}
+
+	igt_subtest("query-garbage")
+		test_query_garbage(fd);
+
+	igt_subtest("query-garbage-items") {
+		igt_require(query_topology_supported(fd));
+		test_query_garbage_items(fd);
+	}
+
+	igt_subtest("query-topology-kernel-writes") {
+		igt_require(query_topology_supported(fd));
+		test_query_topology_kernel_writes(fd);
+	}
+
+	igt_subtest("query-topology-unsupported") {
+		igt_require(!query_topology_supported(fd));
+		test_query_topology_unsupported(fd);
+	}
+
+	igt_subtest("query-topology-coherent-slice-mask") {
+		igt_require(query_topology_supported(fd));
+		test_query_topology_coherent_slice_mask(fd);
+	}
+
+	igt_subtest("query-topology-matches-eu-total") {
+		igt_require(query_topology_supported(fd));
+		test_query_topology_matches_eu_total(fd);
+	}
+
+	igt_subtest("query-topology-known-pci-ids") {
+		igt_require(query_topology_supported(fd));
+		igt_require(IS_HASWELL(devid) || IS_BROADWELL(devid) ||
+			    IS_SKYLAKE(devid) || IS_KABYLAKE(devid) ||
+			    IS_COFFEELAKE(devid));
+		test_query_topology_known_pci_ids(fd, devid);
+	}
+
+	igt_fixture {
+		close(fd);
+	}
+}