igt/perf_pmu: Tighten semaphore-wait measurement

Record the before/after semaphore-wait values around the sleep to try to reduce the inaccuracy from scheduler delays. Previously, the samples were taken before submitting the batch and then after synchronising its completion. The measurement will then be the total that the semaphore was being sampled, but with the extra syscalls intervening may have drifted from the sleep duration. To further reduce the disparity, wait for the batch to start executing before taking our samples. References: https://bugs.freedesktop.org/show_bug.cgi?id=104013 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
author: Chris Wilson <chris@chris-wilson.co.uk> 2017-12-02 00:14:52 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2017-12-04 18:01:53 +0000
commit: c7b20c950276a41badd994324e1983760e44842b (patch)
tree: 61a6f6ce05c476ea49ab5a0c9016684cdd3d6116 /tests
parent: 36015334f9ee86b997fc5c0deb4964184f87648f (diff)
1 files changed, 32 insertions, 22 deletions
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index cb59bf5d..e872f4e5 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -355,13 +355,13 @@ no_sema(int gem_fd, const struct intel_execution_engine2 *e, bool busy)
 static void
 sema_wait(int gem_fd, const struct intel_execution_engine2 *e)
 {
-	struct drm_i915_gem_relocation_entry reloc = { };
-	struct drm_i915_gem_execbuffer2 eb = { };
-	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[2] = {};
+	struct drm_i915_gem_exec_object2 obj[2] = {};
+	struct drm_i915_gem_execbuffer2 eb = {};
 	uint32_t bb_handle, obj_handle;
 	unsigned long slept;
 	uint32_t *obj_ptr;
-	uint32_t batch[6];
+	uint32_t batch[16];
 	uint64_t val[2];
 	int fd;
 
@@ -378,28 +378,35 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e)
 
 	obj_ptr = gem_mmap__wc(gem_fd, obj_handle, 0, 4096, PROT_WRITE);
 
-	batch[0] = MI_SEMAPHORE_WAIT |
+	batch[0] = MI_STORE_DWORD_IMM;
+	batch[1] = sizeof(*obj_ptr);
+	batch[2] = 0;
+	batch[3] = 1;
+	batch[4] = MI_SEMAPHORE_WAIT |
 		   MI_SEMAPHORE_POLL |
 		   MI_SEMAPHORE_SAD_GTE_SDD;
-	batch[1] = 1;
-	batch[2] = 0x0;
-	batch[3] = 0x0;
-	batch[4] = MI_NOOP;
-	batch[5] = MI_BATCH_BUFFER_END;
+	batch[5] = 1;
+	batch[6] = 0x0;
+	batch[7] = 0x0;
+	batch[8] = MI_BATCH_BUFFER_END;
 
 	gem_write(gem_fd, bb_handle, 0, batch, sizeof(batch));
 
-	reloc.target_handle = obj_handle;
-	reloc.offset = 2 * sizeof(uint32_t);
-	reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].target_handle = obj_handle;
+	reloc[0].offset = 1 * sizeof(uint32_t);
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	reloc[0].delta = sizeof(*obj_ptr);
 
-	memset(obj, 0, sizeof(obj));
+	reloc[1].target_handle = obj_handle;
+	reloc[1].offset = 6 * sizeof(uint32_t);
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
 
 	obj[0].handle = obj_handle;
 
 	obj[1].handle = bb_handle;
-	obj[1].relocation_count = 1;
-	obj[1].relocs_ptr = to_user_pointer(&reloc);
+	obj[1].relocation_count = 2;
+	obj[1].relocs_ptr = to_user_pointer(reloc);
 
 	eb.buffer_count = 2;
 	eb.buffers_ptr = to_user_pointer(obj);
@@ -413,18 +420,21 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e)
 
 	fd = open_pmu(I915_PMU_ENGINE_SEMA(e->class, e->instance));
 
-	val[0] = pmu_read_single(fd);
-
 	gem_execbuf(gem_fd, &eb);
+	do { /* wait for the batch to start executing */
+		usleep(5e3);
+	} while (!obj_ptr[1]);
+	usleep(5e3); /* wait for the register sampling */
 
+	val[0] = pmu_read_single(fd);
 	slept = measured_usleep(batch_duration_ns / 1000);
+	val[1] = pmu_read_single(fd);
+	igt_debug("slept %.3fms, sampled %.3fms\n",
+		  slept*1e-6, (val[1] - val[0])*1e-6);
 
-	*obj_ptr = 1;
-
+	obj_ptr[0] = 1;
 	gem_sync(gem_fd, bb_handle);
 
-	val[1] = pmu_read_single(fd);
-
 	munmap(obj_ptr, 4096);
 	gem_close(gem_fd, obj_handle);
 	gem_close(gem_fd, bb_handle);
author	Chris Wilson <chris@chris-wilson.co.uk>	2017-12-02 00:14:52 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2017-12-04 18:01:53 +0000
commit	c7b20c950276a41badd994324e1983760e44842b (patch)
tree	61a6f6ce05c476ea49ab5a0c9016684cdd3d6116 /tests
parent	36015334f9ee86b997fc5c0deb4964184f87648f (diff)