/* * Copyright © 2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include "igt_params.h" #include "drmtest.h" #include "i915/gem.h" #include "i915/gem_create.h" #include "i915/gem_engine_topology.h" #include "i915/gem_mman.h" #include "igt_dummyload.h" #include "igt_sysfs.h" #include "intel_allocator.h" #include "ioctl_wrappers.h" #include "intel_chipset.h" #include "intel_reg.h" #include "sw_sync.h" #define ATTR "timeslice_duration_ms" #define RESET_TIMEOUT 50 /* milliseconds, at least one jiffie for kworker */ #define MI_SEMAPHORE_WAIT (0x1c << 23) #define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) #define MI_SEMAPHORE_SAD_LT_SDD (2 << 12) #define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) #define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) static bool __enable_hangcheck(int dir, bool state) { return igt_sysfs_set(dir, "enable_hangcheck", state ? "1" : "0"); } static bool enable_hangcheck(int i915, bool state) { bool success; int dir; dir = igt_params_open(i915); if (dir < 0) /* no parameters, must be default! */ return false; success = __enable_hangcheck(dir, state); close(dir); return success; } static void set_timeslice(int engine, unsigned int value) { unsigned int delay; igt_sysfs_printf(engine, ATTR, "%u", value); igt_sysfs_scanf(engine, ATTR, "%u", &delay); igt_assert_eq(delay, value); } static int wait_for_reset(int fence) { /* Do a double wait to paper over scheduler fluctuations */ sync_fence_wait(fence, RESET_TIMEOUT); return sync_fence_wait(fence, RESET_TIMEOUT); } static void test_idempotent(int i915, int engine) { const unsigned int delays[] = { 0, 1, 1234, 654321 }; unsigned int saved; /* Quick test to verify the kernel reports the same values as we write */ igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1); igt_debug("Initial %s:%u\n", ATTR, saved); for (int i = 0; i < ARRAY_SIZE(delays); i++) set_timeslice(engine, delays[i]); set_timeslice(engine, saved); } static void test_invalid(int i915, int engine) { unsigned int saved, delay; /* Quick test that non-representable delays are rejected */ igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1); igt_debug("Initial %s:%u\n", ATTR, saved); igt_sysfs_printf(engine, ATTR, "%llu", -1ull); igt_sysfs_scanf(engine, ATTR, "%u", &delay); igt_assert_eq(delay, saved); igt_sysfs_printf(engine, ATTR, "%d", -1); igt_sysfs_scanf(engine, ATTR, "%u", &delay); igt_assert_eq(delay, saved); igt_sysfs_printf(engine, ATTR, "%llu", 123ull << 32); igt_sysfs_scanf(engine, ATTR, "%u", &delay); igt_assert_eq(delay, saved); } static void set_unbannable(int i915, uint32_t ctx) { struct drm_i915_gem_context_param p = { .ctx_id = ctx, .param = I915_CONTEXT_PARAM_BANNABLE, }; gem_context_set_param(i915, &p); } static const intel_ctx_t * create_ctx(int i915, unsigned int class, unsigned int inst, int prio) { const intel_ctx_t *ctx = intel_ctx_create_for_engine(i915, class, inst); set_unbannable(i915, ctx->id); gem_context_set_priority(i915, ctx->id, prio); return ctx; } static int cmp_u32(const void *_a, const void *_b) { const uint32_t *a = _a, *b = _b; return *a - *b; } static double clockrate(int i915) { int freq; drm_i915_getparam_t gp = { .value = &freq, .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY, }; igt_require(igt_ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp) == 0); return 1e9 / freq; } static uint64_t __test_duration(int i915, int engine, unsigned int timeout) { struct drm_i915_gem_exec_object2 obj[3] = { { .handle = gem_create(i915, 4096), .offset = 0, .flags = EXEC_OBJECT_PINNED, }, { .handle = gem_create(i915, 4096), .offset = 4096, .flags = EXEC_OBJECT_PINNED, }, { gem_create(i915, 4096) } }; struct drm_i915_gem_execbuffer2 eb = { .buffer_count = ARRAY_SIZE(obj), .buffers_ptr = to_user_pointer(obj), }; const unsigned int gen = intel_gen(intel_get_drm_devid(i915)); double duration = clockrate(i915); unsigned int class, inst, mmio; uint32_t *cs, *map; const intel_ctx_t *ctx[2]; int start; int i; igt_require(gem_scheduler_has_preemption(i915)); igt_require(gen >= 8); /* MI_SEMAPHORE_WAIT */ igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1); igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1); igt_require(igt_sysfs_scanf(engine, "mmio_base", "%x", &mmio) == 1); set_timeslice(engine, timeout); ctx[0] = create_ctx(i915, class, inst, 0); ctx[1] = create_ctx(i915, class, inst, 0); map = gem_mmap__device_coherent(i915, obj[2].handle, 0, 4096, PROT_WRITE); gem_set_domain(i915, obj[2].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); cs = map; for (i = 0; i < 10; i++) { *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_NEQ_SDD | (4 - 2 + (gen >= 12)); *cs++ = 0; *cs++ = obj[0].offset + sizeof(uint32_t) * i; *cs++ = 0; if (gen >= 12) *cs++ = 0; *cs++ = 0x24 << 23 | 2; /* SRM */ *cs++ = mmio + 0x358; *cs++ = obj[1].offset + sizeof(uint32_t) * i; *cs++ = 0; *cs++ = MI_STORE_DWORD_IMM; *cs++ = obj[0].offset + 4096 - sizeof(uint32_t) * i - sizeof(uint32_t); *cs++ = 0; *cs++ = 1; } *cs++ = MI_BATCH_BUFFER_END; cs += 16 - ((cs - map) & 15); start = (cs - map) * sizeof(*cs); for (i = 0; i < 10; i++) { *cs++ = MI_STORE_DWORD_IMM; *cs++ = obj[0].offset + sizeof(uint32_t) * i; *cs++ = 0; *cs++ = 1; *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_NEQ_SDD | (4 - 2 + (gen >= 12)); *cs++ = 0; *cs++ = obj[0].offset + 4096 - sizeof(uint32_t) * i - sizeof(uint32_t); *cs++ = 0; if (gen >= 12) *cs++ = 0; } *cs++ = MI_BATCH_BUFFER_END; igt_assert(cs - map < 4096 / sizeof(*cs)); munmap(map, 4096); eb.rsvd1 = ctx[0]->id; gem_execbuf(i915, &eb); eb.rsvd1 = ctx[1]->id; eb.batch_start_offset = start; gem_execbuf(i915, &eb); gem_sync(i915, obj[2].handle); gem_set_domain(i915, obj[1].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); map = gem_mmap__device_coherent(i915, obj[1].handle, 0, 4096, PROT_WRITE); for (i = 0; i < 9; i++) map[i] = map[i + 1] - map[i]; qsort(map, 9, sizeof(*map), cmp_u32); duration *= map[4] / 2; /* 2 sema-waits between timestamp updates */ munmap(map, 4096); for (i = 0; i < ARRAY_SIZE(ctx); i++) intel_ctx_destroy(i915, ctx[i]); for (i = 0; i < ARRAY_SIZE(obj); i++) gem_close(i915, obj[i].handle); return duration; } static unsigned int set_heartbeat(int engine, unsigned int value) { const char *attr= "heartbeat_interval_ms"; unsigned int old = ~value, new; igt_debug("set %s:%d\n", attr, value); igt_sysfs_scanf(engine, attr, "%u", &old); igt_require(igt_sysfs_printf(engine, attr, "%u", value) > 0); igt_sysfs_scanf(engine, attr, "%u", &new); igt_assert_eq(new, value); return old; } static void disable_heartbeat(int engine, unsigned int *saved) { *saved = set_heartbeat(engine, 0); } static void enable_heartbeat(int engine, unsigned int saved) { set_heartbeat(engine, saved); } static void test_duration(int i915, int engine) { int delays[] = { 1, 50, 100, 500 }; unsigned int saved, heartbeat; uint64_t elapsed; int epsilon; /* * Timeslicing at its very basic level is sharing the GPU by * running one context for interval before running another. After * each interval the running context is swapped for another runnable * context. * * We can measure this directly by watching the xCS_TIMESTAMP and * recording its value every time we switch into the context, using * a couple of semaphores to busyspin for the timeslice. */ igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1); igt_debug("Initial %s:%u\n", ATTR, saved); gem_quiescent_gpu(i915); disable_heartbeat(engine, &heartbeat); elapsed = __test_duration(i915, engine, 1); epsilon = 2 * elapsed / 1000 / 1000; if (epsilon < 50) epsilon = 50; igt_info("Minimum duration measured as %.3fms; setting error threshold to %dms\n", elapsed * 1e-6, epsilon); igt_require(epsilon < 1000); for (int i = 0; i < ARRAY_SIZE(delays); i++) { elapsed = __test_duration(i915, engine, delays[i]); igt_info("%s:%d, elapsed=%.3fms\n", ATTR, delays[i], elapsed * 1e-6); /* * We need to give a couple of jiffies slack for the scheduler * timeouts and then a little more slack fr the overhead in * submitting and measuring. 50ms should cover all of our sins * and be useful tolerance. */ igt_assert_f(elapsed / 1000 / 1000 < delays[i] + epsilon, "Timeslice exceeded request!\n"); } enable_heartbeat(engine, heartbeat); gem_quiescent_gpu(i915); set_timeslice(engine, saved); } static uint64_t __test_timeout(int i915, int engine, unsigned int timeout) { unsigned int class, inst; struct timespec ts = {}; igt_spin_t *spin[2]; uint64_t elapsed; const intel_ctx_t *ctx[2]; uint64_t ahnd[2]; igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1); igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1); set_timeslice(engine, timeout); ctx[0] = create_ctx(i915, class, inst, 0); ahnd[0] = get_reloc_ahnd(i915, ctx[0]->id); spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx = ctx[0], .flags = (IGT_SPIN_NO_PREEMPTION | IGT_SPIN_POLL_RUN | IGT_SPIN_FENCE_OUT)); igt_spin_busywait_until_started(spin[0]); ctx[1] = create_ctx(i915, class, inst, 0); ahnd[1] = get_reloc_ahnd(i915, ctx[1]->id); igt_nsec_elapsed(&ts); spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx = ctx[1], .flags = IGT_SPIN_POLL_RUN); igt_spin_busywait_until_started(spin[1]); elapsed = igt_nsec_elapsed(&ts); igt_spin_free(i915, spin[1]); igt_assert_eq(wait_for_reset(spin[0]->out_fence), 0); igt_assert_eq(sync_fence_status(spin[0]->out_fence), -EIO); igt_spin_free(i915, spin[0]); intel_ctx_destroy(i915, ctx[1]); intel_ctx_destroy(i915, ctx[0]); put_ahnd(ahnd[1]); put_ahnd(ahnd[0]); gem_quiescent_gpu(i915); return elapsed; } static void test_timeout(int i915, int engine) { int delays[] = { 1, 50, 100, 500 }; unsigned int saved; uint64_t elapsed; int epsilon; /* * Timeslicing requires us to preempt the running context in order to * switch into its contemporary. If we couple a unpreemptable hog * with a fast forced reset, we can measure the timeslice by how long * it takes for the hog to be reset and the high priority context * to complete. */ igt_require(igt_sysfs_printf(engine, "preempt_timeout_ms", "%u", 1) == 1); igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1); igt_debug("Initial %s:%u\n", ATTR, saved); gem_quiescent_gpu(i915); igt_require(enable_hangcheck(i915, false)); elapsed = __test_timeout(i915, engine, 1); epsilon = 2 * elapsed / 1000 / 1000; if (epsilon < 50) epsilon = 50; igt_info("Minimum timeout measured as %.3fms; setting error threshold to %dms\n", elapsed * 1e-6, epsilon); igt_require(epsilon < 1000); for (int i = 0; i < ARRAY_SIZE(delays); i++) { elapsed = __test_timeout(i915, engine, delays[i]); igt_info("%s:%d, elapsed=%.3fms\n", ATTR, delays[i], elapsed * 1e-6); /* * We need to give a couple of jiffies slack for the scheduler * timeouts and then a little more slack fr the overhead in * submitting and measuring. 50ms should cover all of our sins * and be useful tolerance. */ igt_assert_f(elapsed / 1000 / 1000 < delays[i] + epsilon, "Timeslice exceeded request!\n"); } igt_assert(enable_hangcheck(i915, true)); gem_quiescent_gpu(i915); set_timeslice(engine, saved); } static void test_off(int i915, int engine) { unsigned int class, inst; unsigned int saved; igt_spin_t *spin[2]; const intel_ctx_t *ctx[2]; uint64_t ahnd[2]; /* * As always, there are some who must run uninterrupted and simply do * not want to share the GPU even for a microsecond. Those greedy * clients can disable timeslicing entirely, and so set the timeslice * to 0. We test that a hog is not preempted within the 150s of * our boredom threshold. */ igt_require(igt_sysfs_printf(engine, "preempt_timeout_ms", "%u", 1) == 1); igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1); igt_debug("Initial %s:%u\n", ATTR, saved); gem_quiescent_gpu(i915); igt_require(enable_hangcheck(i915, false)); igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1); igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1); set_timeslice(engine, 0); ctx[0] = create_ctx(i915, class, inst, 0); ahnd[0] = get_reloc_ahnd(i915, ctx[0]->id); spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx = ctx[0], .flags = (IGT_SPIN_NO_PREEMPTION | IGT_SPIN_POLL_RUN | IGT_SPIN_FENCE_OUT)); igt_spin_busywait_until_started(spin[0]); ctx[1] = create_ctx(i915, class, inst, 0); ahnd[1] = get_reloc_ahnd(i915, ctx[1]->id); spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx = ctx[1], .flags = IGT_SPIN_POLL_RUN); for (int i = 0; i < 150; i++) { igt_assert_eq(sync_fence_status(spin[0]->out_fence), 0); sleep(1); } set_timeslice(engine, 1); igt_spin_busywait_until_started(spin[1]); igt_spin_free(i915, spin[1]); igt_assert_eq(wait_for_reset(spin[0]->out_fence), 0); igt_assert_eq(sync_fence_status(spin[0]->out_fence), -EIO); igt_spin_free(i915, spin[0]); intel_ctx_destroy(i915, ctx[1]); intel_ctx_destroy(i915, ctx[0]); put_ahnd(ahnd[1]); put_ahnd(ahnd[0]); igt_assert(enable_hangcheck(i915, true)); gem_quiescent_gpu(i915); set_timeslice(engine, saved); } igt_main { static const struct { const char *name; void (*fn)(int, int); } tests[] = { { "idempotent", test_idempotent }, { "invalid", test_invalid }, { "duration", test_duration }, { "timeout", test_timeout }, { "off", test_off }, { } }; int i915 = -1, engines = -1; igt_fixture { int sys; i915 = drm_open_driver(DRIVER_INTEL); igt_require_gem(i915); igt_allow_hang(i915, 0, 0); sys = igt_sysfs_open(i915); igt_require(sys != -1); engines = openat(sys, "engine", O_RDONLY); igt_require(engines != -1); close(sys); } for (typeof(*tests) *t = tests; t->name; t++) igt_subtest_with_dynamic(t->name) dyn_sysfs_engines(i915, engines, ATTR, t->fn); igt_fixture { close(engines); close(i915); } }