From 741bf7064c467df725c14cc0b3b8b50436f9ee09 Mon Sep 17 00:00:00 2001 From: Arkadiusz Hiler Date: Thu, 18 Oct 2018 14:06:42 +0300 Subject: tests: Introduce i915 directory We can already move all the tests with distinct prefixes: gem_, gen3_ and i915_. pm_ and drv_ tests will follow in batches, so we can do the adjustments in the reporting/filtering layer of the CI system. v2: Fix test-list.txt generation with meson v3: Fix docs build (Petri) Cc: Chris Wilson Cc: Petri Latvala Cc: Martin Peres Signed-off-by: Arkadiusz Hiler Reviewed-by: Petri Latvala Tested-by: Petri Latvala Acked-by: Daniel Vetter Acked-by: Chris Wilson --- tests/i915/gem_exec_latency.c | 721 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 721 insertions(+) create mode 100644 tests/i915/gem_exec_latency.c (limited to 'tests/i915/gem_exec_latency.c') diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c new file mode 100644 index 00000000..de16322a --- /dev/null +++ b/tests/i915/gem_exec_latency.c @@ -0,0 +1,721 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "igt.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "drm.h" + +#include "igt_sysfs.h" +#include "igt_vgem.h" +#include "igt_dummyload.h" +#include "igt_stats.h" + +#include "i915/gem_ring.h" + +#define LOCAL_I915_EXEC_NO_RELOC (1<<11) +#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) + +#define LOCAL_I915_EXEC_BSD_SHIFT (13) +#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) + +#define ENGINE_FLAGS (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) + +#define CORK 0x1 +#define PREEMPT 0x2 + +static unsigned int ring_size; + +static void +poll_ring(int fd, unsigned ring, const char *name) +{ + const struct igt_spin_factory opts = { + .engine = ring, + .flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST, + }; + struct timespec tv = {}; + unsigned long cycles; + igt_spin_t *spin[2]; + uint64_t elapsed; + uint32_t cmd; + + gem_require_ring(fd, ring); + igt_require(gem_can_store_dword(fd, ring)); + + spin[0] = __igt_spin_batch_factory(fd, &opts); + igt_assert(spin[0]->running); + cmd = *spin[0]->batch; + + spin[1] = __igt_spin_batch_factory(fd, &opts); + igt_assert(spin[1]->running); + igt_assert(cmd == *spin[1]->batch); + + igt_spin_batch_end(spin[0]); + while (!READ_ONCE(*spin[1]->running)) + ; + igt_assert(!gem_bo_busy(fd, spin[0]->handle)); + + cycles = 0; + while ((elapsed = igt_nsec_elapsed(&tv)) < 2ull << 30) { + unsigned int idx = cycles++ & 1; + + *spin[idx]->batch = cmd; + *spin[idx]->running = 0; + gem_execbuf(fd, &spin[idx]->execbuf); + + igt_spin_batch_end(spin[!idx]); + while (!READ_ONCE(*spin[idx]->running)) + ; + } + + igt_info("%s completed %ld cycles: %.3f us\n", + name, cycles, elapsed*1e-3/cycles); + + igt_spin_batch_free(fd, spin[1]); + igt_spin_batch_free(fd, spin[0]); +} + +#define RCS_TIMESTAMP (0x2000 + 0x358) +static void latency_on_ring(int fd, + unsigned ring, const char *name, + unsigned flags) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + const int has_64bit_reloc = gen >= 8; + struct drm_i915_gem_exec_object2 obj[3]; + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_execbuffer2 execbuf; + IGT_CORK_HANDLE(c); + volatile uint32_t *reg; + unsigned repeats = ring_size; + uint32_t start, end, *map, *results; + uint64_t offset; + double gpu_latency; + int i, j; + + reg = (volatile uint32_t *)((volatile char *)igt_global_mmio + RCS_TIMESTAMP); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(&obj[1]); + execbuf.buffer_count = 2; + execbuf.flags = ring; + execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT; + + memset(obj, 0, sizeof(obj)); + obj[1].handle = gem_create(fd, 4096); + obj[1].flags = EXEC_OBJECT_WRITE; + results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ); + + obj[2].handle = gem_create(fd, 64*1024); + map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE); + gem_set_domain(fd, obj[2].handle, + I915_GEM_DOMAIN_GTT, + I915_GEM_DOMAIN_GTT); + map[0] = MI_BATCH_BUFFER_END; + gem_execbuf(fd, &execbuf); + + memset(&reloc,0, sizeof(reloc)); + obj[2].relocation_count = 1; + obj[2].relocs_ptr = to_user_pointer(&reloc); + + gem_set_domain(fd, obj[2].handle, + I915_GEM_DOMAIN_GTT, + I915_GEM_DOMAIN_GTT); + + reloc.target_handle = flags & CORK ? 1 : 0; + reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION; + reloc.presumed_offset = obj[1].offset; + + for (j = 0; j < repeats; j++) { + execbuf.batch_start_offset = 64 * j; + reloc.offset = + execbuf.batch_start_offset + sizeof(uint32_t); + reloc.delta = sizeof(uint32_t) * j; + + offset = reloc.presumed_offset; + offset += reloc.delta; + + i = 16 * j; + /* MI_STORE_REG_MEM */ + map[i++] = 0x24 << 23 | 1; + if (has_64bit_reloc) + map[i-1]++; + map[i++] = RCS_TIMESTAMP; /* ring local! */ + map[i++] = offset; + if (has_64bit_reloc) + map[i++] = offset >> 32; + map[i++] = MI_BATCH_BUFFER_END; + } + + if (flags & CORK) { + obj[0].handle = igt_cork_plug(&c, fd); + execbuf.buffers_ptr = to_user_pointer(&obj[0]); + execbuf.buffer_count = 3; + } + + start = *reg; + for (j = 0; j < repeats; j++) { + uint64_t presumed_offset = reloc.presumed_offset; + + execbuf.batch_start_offset = 64 * j; + reloc.offset = + execbuf.batch_start_offset + sizeof(uint32_t); + reloc.delta = sizeof(uint32_t) * j; + + gem_execbuf(fd, &execbuf); + igt_assert(reloc.presumed_offset == presumed_offset); + } + end = *reg; + igt_assert(reloc.presumed_offset == obj[1].offset); + + if (flags & CORK) + igt_cork_unplug(&c); + + gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0); + gpu_latency = (results[repeats-1] - results[0]) / (double)(repeats-1); + + gem_set_domain(fd, obj[2].handle, + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); + + execbuf.batch_start_offset = 0; + for (j = 0; j < repeats - 1; j++) { + offset = obj[2].offset; + offset += 64 * (j + 1); + + i = 16 * j + (has_64bit_reloc ? 4 : 3); + map[i] = MI_BATCH_BUFFER_START; + if (gen >= 8) { + map[i] |= 1 << 8 | 1; + map[i + 1] = offset; + map[i + 2] = offset >> 32; + } else if (gen >= 6) { + map[i] |= 1 << 8; + map[i + 1] = offset; + } else { + map[i] |= 2 << 6; + map[i + 1] = offset; + if (gen < 4) + map[i] |= 1; + } + } + offset = obj[2].offset; + gem_execbuf(fd, &execbuf); + igt_assert(offset == obj[2].offset); + + gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0); + igt_info("%s: dispatch latency: %.2f, execution latency: %.2f (target %.2f)\n", + name, + (end - start) / (double)repeats, + gpu_latency, (results[repeats - 1] - results[0]) / (double)(repeats - 1)); + + munmap(map, 64*1024); + munmap(results, 4096); + if (flags & CORK) + gem_close(fd, obj[0].handle); + gem_close(fd, obj[1].handle); + gem_close(fd, obj[2].handle); +} + +static void latency_from_ring(int fd, + unsigned ring, const char *name, + unsigned flags) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + const int has_64bit_reloc = gen >= 8; + struct drm_i915_gem_exec_object2 obj[3]; + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_execbuffer2 execbuf; + const unsigned int repeats = ring_size / 2; + unsigned int other; + uint32_t *map, *results; + uint32_t ctx[2] = {}; + int i, j; + + if (flags & PREEMPT) { + ctx[0] = gem_context_create(fd); + gem_context_set_priority(fd, ctx[0], -1023); + + ctx[1] = gem_context_create(fd); + gem_context_set_priority(fd, ctx[1], 1023); + } + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(&obj[1]); + execbuf.buffer_count = 2; + execbuf.flags = ring; + execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT; + execbuf.rsvd1 = ctx[1]; + + memset(obj, 0, sizeof(obj)); + obj[1].handle = gem_create(fd, 4096); + obj[1].flags = EXEC_OBJECT_WRITE; + results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ); + + obj[2].handle = gem_create(fd, 64*1024); + map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE); + gem_set_domain(fd, obj[2].handle, + I915_GEM_DOMAIN_GTT, + I915_GEM_DOMAIN_GTT); + map[0] = MI_BATCH_BUFFER_END; + gem_execbuf(fd, &execbuf); + + memset(&reloc,0, sizeof(reloc)); + obj[2].relocation_count = 1; + obj[2].relocs_ptr = to_user_pointer(&reloc); + + gem_set_domain(fd, obj[2].handle, + I915_GEM_DOMAIN_GTT, + I915_GEM_DOMAIN_GTT); + + reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION; + reloc.presumed_offset = obj[1].offset; + reloc.target_handle = flags & CORK ? 1 : 0; + + for_each_physical_engine(fd, other) { + igt_spin_t *spin = NULL; + IGT_CORK_HANDLE(c); + + gem_set_domain(fd, obj[2].handle, + I915_GEM_DOMAIN_GTT, + I915_GEM_DOMAIN_GTT); + + if (flags & PREEMPT) + spin = __igt_spin_batch_new(fd, + .ctx = ctx[0], + .engine = ring); + + if (flags & CORK) { + obj[0].handle = igt_cork_plug(&c, fd); + execbuf.buffers_ptr = to_user_pointer(&obj[0]); + execbuf.buffer_count = 3; + } + + for (j = 0; j < repeats; j++) { + uint64_t offset; + + execbuf.flags &= ~ENGINE_FLAGS; + execbuf.flags |= ring; + + execbuf.batch_start_offset = 64 * j; + reloc.offset = + execbuf.batch_start_offset + sizeof(uint32_t); + reloc.delta = sizeof(uint32_t) * j; + + reloc.presumed_offset = obj[1].offset; + offset = reloc.presumed_offset; + offset += reloc.delta; + + i = 16 * j; + /* MI_STORE_REG_MEM */ + map[i++] = 0x24 << 23 | 1; + if (has_64bit_reloc) + map[i-1]++; + map[i++] = RCS_TIMESTAMP; /* ring local! */ + map[i++] = offset; + if (has_64bit_reloc) + map[i++] = offset >> 32; + map[i++] = MI_BATCH_BUFFER_END; + + gem_execbuf(fd, &execbuf); + + execbuf.flags &= ~ENGINE_FLAGS; + execbuf.flags |= other; + + execbuf.batch_start_offset = 64 * (j + repeats); + reloc.offset = + execbuf.batch_start_offset + sizeof(uint32_t); + reloc.delta = sizeof(uint32_t) * (j + repeats); + + reloc.presumed_offset = obj[1].offset; + offset = reloc.presumed_offset; + offset += reloc.delta; + + i = 16 * (j + repeats); + /* MI_STORE_REG_MEM */ + map[i++] = 0x24 << 23 | 1; + if (has_64bit_reloc) + map[i-1]++; + map[i++] = RCS_TIMESTAMP; /* ring local! */ + map[i++] = offset; + if (has_64bit_reloc) + map[i++] = offset >> 32; + map[i++] = MI_BATCH_BUFFER_END; + + gem_execbuf(fd, &execbuf); + } + + if (flags & CORK) + igt_cork_unplug(&c); + gem_set_domain(fd, obj[1].handle, + I915_GEM_DOMAIN_GTT, + I915_GEM_DOMAIN_GTT); + igt_spin_batch_free(fd, spin); + + igt_info("%s-%s delay: %.2f\n", + name, e__->name, + (results[2*repeats-1] - results[0]) / (double)repeats); + } + + munmap(map, 64*1024); + munmap(results, 4096); + + if (flags & CORK) + gem_close(fd, obj[0].handle); + gem_close(fd, obj[1].handle); + gem_close(fd, obj[2].handle); + + if (flags & PREEMPT) { + gem_context_destroy(fd, ctx[1]); + gem_context_destroy(fd, ctx[0]); + } +} + +static void __rearm_spin_batch(igt_spin_t *spin) +{ + const uint32_t mi_arb_chk = 0x5 << 23; + + *spin->batch = mi_arb_chk; + *spin->running = 0; + __sync_synchronize(); +} + +static void +__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags) +{ + struct drm_i915_gem_execbuffer2 eb = spin->execbuf; + + eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK); + eb.flags |= flags | I915_EXEC_NO_RELOC; + + gem_execbuf(fd, &eb); +} + +struct rt_pkt { + struct igt_mean mean; + double min, max; +}; + +static bool __spin_wait(int fd, igt_spin_t *spin) +{ + while (!READ_ONCE(*spin->running)) { + if (!gem_bo_busy(fd, spin->handle)) + return false; + } + + return true; +} + +/* + * Test whether RT thread which hogs the CPU a lot can submit work with + * reasonable latency. + */ +static void +rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned int flags) +#define RTIDLE 0x1 +{ + const char *passname[] = { + "warmup", + "normal", + "rt[0]", + "rt[1]", + "rt[2]", + "rt[3]", + "rt[4]", + "rt[5]", + "rt[6]", + }; +#define NPASS ARRAY_SIZE(passname) +#define MMAP_SZ (64 << 10) + const struct igt_spin_factory opts = { + .engine = engine, + .flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST, + }; + struct rt_pkt *results; + unsigned int engines[16]; + const char *names[16]; + unsigned int nengine; + int ret; + + igt_assert(ARRAY_SIZE(engines) * NPASS * sizeof(*results) <= MMAP_SZ); + results = mmap(NULL, MMAP_SZ, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + igt_assert(results != MAP_FAILED); + + nengine = 0; + if (engine == ALL_ENGINES) { + for_each_physical_engine(fd, engine) { + if (!gem_can_store_dword(fd, engine)) + continue; + + engines[nengine] = engine; + names[nengine] = e__->name; + nengine++; + } + igt_require(nengine > 1); + } else { + igt_require(gem_can_store_dword(fd, engine)); + engines[nengine] = engine; + names[nengine] = name; + nengine++; + } + + gem_quiescent_gpu(fd); + + igt_fork(child, nengine) { + unsigned int pass = 0; /* Three phases: warmup, normal, rt. */ + + engine = engines[child]; + do { + struct igt_mean mean; + double min = HUGE_VAL; + double max = -HUGE_VAL; + igt_spin_t *spin; + + igt_mean_init(&mean); + + if (pass == 2) { + struct sched_param rt = + { .sched_priority = 99 }; + + ret = sched_setscheduler(0, + SCHED_FIFO | SCHED_RESET_ON_FORK, + &rt); + if (ret) { + igt_warn("Failed to set scheduling policy!\n"); + break; + } + } + + usleep(250); + + spin = __igt_spin_batch_factory(fd, &opts); + if (!spin) { + igt_warn("Failed to create spinner! (%s)\n", + passname[pass]); + break; + } + igt_spin_busywait_until_running(spin); + + igt_until_timeout(pass > 0 ? 5 : 2) { + struct timespec ts = { }; + double t; + + igt_spin_batch_end(spin); + gem_sync(fd, spin->handle); + if (flags & RTIDLE) + igt_drop_caches_set(fd, DROP_IDLE); + + /* + * If we are oversubscribed (more RT hogs than + * cpus) give the others a change to run; + * otherwise, they will interrupt us in the + * middle of the measurement. + */ + if (nengine > 1) + usleep(10*nengine); + + __rearm_spin_batch(spin); + + igt_nsec_elapsed(&ts); + __submit_spin_batch(fd, spin, engine); + if (!__spin_wait(fd, spin)) { + igt_warn("Wait timeout! (%s)\n", + passname[pass]); + break; + } + + t = igt_nsec_elapsed(&ts) * 1e-9; + if (t > max) + max = t; + if (t < min) + min = t; + + igt_mean_add(&mean, t); + } + + igt_spin_batch_free(fd, spin); + + igt_info("%8s %10s: mean=%.2fus stddev=%.3fus [%.2fus, %.2fus] (n=%lu)\n", + names[child], + passname[pass], + igt_mean_get(&mean) * 1e6, + sqrt(igt_mean_get_variance(&mean)) * 1e6, + min * 1e6, max * 1e6, + mean.count); + + results[NPASS * child + pass].mean = mean; + results[NPASS * child + pass].min = min; + results[NPASS * child + pass].max = max; + } while (++pass < NPASS); + } + + igt_waitchildren(); + + for (unsigned int child = 0; child < nengine; child++) { + struct rt_pkt normal = results[NPASS * child + 1]; + igt_stats_t stats; + double variance; + + igt_stats_init_with_size(&stats, NPASS); + + for (unsigned int pass = 2; pass < NPASS; pass++) { + struct rt_pkt *rt = &results[NPASS * child + pass]; + + igt_assert(rt->max); + + igt_stats_push_float(&stats, igt_mean_get(&rt->mean)); + variance += igt_mean_get_variance(&rt->mean); + } + variance /= NPASS - 2; + + igt_info("%8s: normal latency=%.2f±%.3fus, rt latency=%.2f±%.3fus\n", + names[child], + igt_mean_get(&normal.mean) * 1e6, + sqrt(igt_mean_get_variance(&normal.mean)) * 1e6, + igt_stats_get_median(&stats) * 1e6, + sqrt(variance) * 1e6); + + igt_assert(igt_stats_get_median(&stats) < + igt_mean_get(&normal.mean) * 2); + + /* The system is noisy; be conservative when declaring fail. */ + igt_assert(variance < igt_mean_get_variance(&normal.mean) * 10); + } + + munmap(results, MMAP_SZ); +} + +igt_main +{ + const struct intel_execution_engine *e; + int device = -1; + + igt_fixture { + device = drm_open_driver(DRIVER_INTEL); + igt_require_gem(device); + gem_require_mmap_wc(device); + + gem_submission_print_method(device); + + ring_size = gem_measure_ring_inflight(device, ALL_ENGINES, 0); + igt_info("Ring size: %d batches\n", ring_size); + igt_require(ring_size > 8); + ring_size -= 8; /* leave some spare */ + if (ring_size > 1024) + ring_size = 1024; + + intel_register_access_init(intel_get_pci_device(), false, device); + } + + igt_subtest("all-rtidle-submit") + rthog_latency_on_ring(device, ALL_ENGINES, "all", RTIDLE); + + igt_subtest("all-rthog-submit") + rthog_latency_on_ring(device, ALL_ENGINES, "all", 0); + + igt_subtest_group { + igt_fixture + igt_require(intel_gen(intel_get_drm_devid(device)) >= 7); + + for (e = intel_execution_engines; e->name; e++) { + if (e->exec_id == 0) + continue; + + igt_subtest_group { + igt_fixture { + igt_require(gem_ring_has_physical_engine(device, e->exec_id | e->flags)); + } + + igt_subtest_f("%s-dispatch", e->name) + latency_on_ring(device, + e->exec_id | e->flags, + e->name, 0); + + igt_subtest_f("%s-poll", e->name) + poll_ring(device, + e->exec_id | e->flags, + e->name); + + igt_subtest_f("%s-rtidle-submit", e->name) + rthog_latency_on_ring(device, + e->exec_id | + e->flags, + e->name, + RTIDLE); + + igt_subtest_f("%s-rthog-submit", e->name) + rthog_latency_on_ring(device, + e->exec_id | + e->flags, + e->name, + 0); + + igt_subtest_f("%s-dispatch-queued", e->name) + latency_on_ring(device, + e->exec_id | e->flags, + e->name, CORK); + + igt_subtest_f("%s-synchronisation", e->name) + latency_from_ring(device, + e->exec_id | e->flags, + e->name, 0); + + igt_subtest_f("%s-synchronisation-queued", e->name) + latency_from_ring(device, + e->exec_id | e->flags, + e->name, CORK); + + igt_subtest_group { + igt_fixture { + gem_require_contexts(device); + igt_require(gem_scheduler_has_preemption(device)); + } + + igt_subtest_f("%s-preemption", e->name) + latency_from_ring(device, + e->exec_id | e->flags, + e->name, PREEMPT); + } + } + } + } + + igt_fixture { + close(device); + } +} -- cgit v1.2.3