/* * Copyright © 2021 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "config.h" #include #include #include #include #include #include "i915/gem.h" #include "i915/gem_create.h" #include "i915/gem_vm.h" #include "igt.h" #include "igt_params.h" #include "sw_sync.h" #define EWATCHDOG EINTR static unsigned int default_timeout_wait_s; static const unsigned int watchdog_us = 500 * 1000; static unsigned int wait_timeout(int i915, igt_spin_t **spin, unsigned int num_engines, unsigned int wait_us, unsigned int expect) { unsigned int count_idle = 0, count_fence = 0, count_started = 0, i; bool started[num_engines]; memset(started, 0, sizeof(started)); while (count_started < num_engines) { for (i = 0; i < num_engines; i++) { if (started[i]) continue; if (igt_spin_has_started(spin[i])) { started[i] = true; count_started++; } } } igt_until_timeout(DIV_ROUND_UP(wait_us, USEC_PER_SEC)) { usleep(watchdog_us / 2); for (i = 0, count_idle = 0; i < num_engines; i++) { if (!gem_bo_busy(i915, spin[i]->handle)) count_idle++; } for (i = 0, count_fence = 0; i < num_engines; i++) { if (sync_fence_status(spin[i]->out_fence)) count_fence++; } if (count_idle == num_engines) break; } if (count_idle < expect) { for (i = 0; i < num_engines; i++) { if (gem_bo_busy(i915, spin[i]->handle)) igt_warn("Request %u/%u not cancelled!\n", i + 1, num_engines); } } if (count_fence < expect) { for (i = 0; i < num_engines; i++) { if (!sync_fence_status(spin[i]->out_fence)) igt_warn("Fence %u/%u not timed out!\n", i + 1, num_engines); } } igt_assert_eq(count_idle, count_fence); return count_fence; } static unsigned int spin_flags(void) { return IGT_SPIN_POLL_RUN | IGT_SPIN_FENCE_OUT; } static void physical(int i915, const intel_ctx_t *ctx) { const unsigned int wait_us = default_timeout_wait_s * USEC_PER_SEC; unsigned int num_engines, i, count; const struct intel_execution_engine2 *e; igt_spin_t *spin[GEM_MAX_ENGINES]; uint64_t ahnd = get_reloc_ahnd(i915, ctx->id); i = 0; for_each_ctx_engine(i915, ctx, e) { spin[i] = igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx, .engine = e->flags, .flags = spin_flags()); i++; } num_engines = i; count = wait_timeout(i915, spin, num_engines, wait_us, num_engines); for (i = 0; i < num_engines; i++) igt_spin_free(i915, spin[i]); put_ahnd(ahnd); igt_assert_eq(count, num_engines); } static struct i915_engine_class_instance * list_engines(const intel_ctx_cfg_t *cfg, unsigned int class, unsigned int *out) { struct i915_engine_class_instance *ci; unsigned int count = 0, i; ci = malloc(cfg->num_engines * sizeof(*ci)); igt_assert(ci); for (i = 0; i < cfg->num_engines; i++) { if (class == cfg->engines[i].engine_class) ci[count++] = cfg->engines[i]; } if (!count) { free(ci); ci = NULL; } *out = count; return ci; } static size_t sizeof_load_balance(int count) { return offsetof(struct i915_context_engines_load_balance, engines[count]); } static size_t sizeof_param_engines(int count) { return offsetof(struct i915_context_param_engines, engines[count]); } #define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); }) static int __set_load_balancer(int i915, uint32_t ctx, const struct i915_engine_class_instance *ci, unsigned int count, void *ext) { struct i915_context_engines_load_balance *balancer = alloca0(sizeof_load_balance(count)); struct i915_context_param_engines *engines = alloca0(sizeof_param_engines(count + 1)); struct drm_i915_gem_context_param p = { .ctx_id = ctx, .param = I915_CONTEXT_PARAM_ENGINES, .size = sizeof_param_engines(count + 1), .value = to_user_pointer(engines) }; balancer->base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; balancer->base.next_extension = to_user_pointer(ext); igt_assert(count); balancer->num_siblings = count; memcpy(balancer->engines, ci, count * sizeof(*ci)); engines->extensions = to_user_pointer(balancer); engines->engines[0].engine_class = I915_ENGINE_CLASS_INVALID; engines->engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; memcpy(engines->engines + 1, ci, count * sizeof(*ci)); return __gem_context_set_param(i915, &p); } static void set_load_balancer(int i915, uint32_t ctx, const struct i915_engine_class_instance *ci, unsigned int count, void *ext) { igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0); } static void virtual(int i915, const intel_ctx_cfg_t *base_cfg) { const unsigned int wait_us = default_timeout_wait_s * USEC_PER_SEC; unsigned int num_engines = base_cfg->num_engines, i, count; igt_spin_t *spin[num_engines]; unsigned int expect = num_engines; intel_ctx_cfg_t cfg = {}; const intel_ctx_t *ctx[num_engines]; uint64_t ahnd; igt_require(gem_using_execlists(i915) || gem_using_guc_submission(i915)); igt_debug("%u virtual engines\n", num_engines); igt_require(num_engines); cfg.vm = gem_vm_create(i915); i = 0; for (int class = 0; class < 32; class++) { struct i915_engine_class_instance *ci; ci = list_engines(base_cfg, class, &count); if (!ci) continue; for (int pass = 0; pass < count; pass++) { igt_assert(sizeof(*ci) == sizeof(int)); igt_permute_array(ci, count, igt_exchange_int); igt_assert(i < num_engines); ctx[i] = intel_ctx_create(i915, &cfg); ahnd = get_reloc_ahnd(i915, ctx[i]->id); set_load_balancer(i915, ctx[i]->id, ci, count, NULL); spin[i] = igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx[i], .flags = spin_flags()); i++; } free(ci); } count = wait_timeout(i915, spin, num_engines, wait_us, expect); for (i = 0; i < num_engines && spin[i]; i++) { ahnd = spin[i]->opts.ahnd; igt_spin_free(i915, spin[i]); intel_ctx_destroy(i915, ctx[i]); put_ahnd(ahnd); } igt_assert_eq(count, expect); } static unsigned int offset_in_page(void *addr) { return (uintptr_t)addr & 4095; } static uint64_t div64_u64_round_up(uint64_t x, uint64_t y) { return (x + y - 1) / y; } static int read_timestamp_frequency(int i915) { int value = 0; drm_i915_getparam_t gp = { .value = &value, .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY, }; ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp); return value; } static uint64_t ns_to_ticks(int i915, uint64_t ns) { return div64_u64_round_up(ns * read_timestamp_frequency(i915), NSEC_PER_SEC); } static uint32_t __batch_create(int i915, uint32_t offset) { const uint32_t bbe = MI_BATCH_BUFFER_END; uint32_t handle; handle = gem_create(i915, ALIGN(offset + 4, 4096)); gem_write(i915, handle, offset, &bbe, sizeof(bbe)); return handle; } static uint32_t batch_create(int i915) { return __batch_create(i915, 0); } static void delay(int i915, const struct intel_execution_engine2 *e, uint32_t handle, uint64_t addr, uint64_t ns) { const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8; const uint32_t base = gem_engine_mmio_base(i915, e->name); #define CS_GPR(x) (base + 0x600 + 8 * (x)) #define RUNTIME (base + 0x3a8) enum { START_TS, NOW_TS }; uint32_t *map, *cs, *jmp; igt_require(base); /* Loop until CTX_TIMESTAMP - initial > @ns */ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE); *cs++ = MI_LOAD_REGISTER_IMM; *cs++ = CS_GPR(START_TS) + 4; *cs++ = 0; *cs++ = MI_LOAD_REGISTER_REG; *cs++ = RUNTIME; *cs++ = CS_GPR(START_TS); while (offset_in_page(cs) & 63) *cs++ = 0; jmp = cs; *cs++ = 0x5 << 23; /* MI_ARB_CHECK */ *cs++ = MI_LOAD_REGISTER_IMM; *cs++ = CS_GPR(NOW_TS) + 4; *cs++ = 0; *cs++ = MI_LOAD_REGISTER_REG; *cs++ = RUNTIME; *cs++ = CS_GPR(NOW_TS); /* delta = now - start; inverted to match COND_BBE */ *cs++ = MI_MATH(4); *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); *cs++ = MI_MATH_SUB; *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU); /* Save delta for reading by COND_BBE */ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */ *cs++ = CS_GPR(NOW_TS); *cs++ = addr + 4000; *cs++ = addr >> 32; /* Delay between SRM and COND_BBE to post the writes */ for (int n = 0; n < 8; n++) { *cs++ = MI_STORE_DWORD_IMM; if (use_64b) { *cs++ = addr + 4064; *cs++ = addr >> 32; } else { *cs++ = 0; *cs++ = addr + 4064; } *cs++ = 0; } /* Break if delta > ns */ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b); *cs++ = ~ns_to_ticks(i915, ns); *cs++ = addr + 4000; *cs++ = addr >> 32; /* Otherwise back to recalculating delta */ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b; *cs++ = addr + offset_in_page(jmp); *cs++ = addr >> 32; munmap(map, 4096); } static struct drm_i915_gem_exec_object2 delay_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e, uint64_t target_ns) { struct drm_i915_gem_exec_object2 obj = { .handle = batch_create(i915), .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS, }; struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = to_user_pointer(&obj), .buffer_count = 1, .rsvd1 = ctx, .flags = e->flags, }; obj.offset = obj.handle << 12; gem_execbuf(i915, &execbuf); gem_sync(i915, obj.handle); delay(i915, e, obj.handle, obj.offset, target_ns); obj.flags |= EXEC_OBJECT_PINNED; return obj; } static int __execbuf(int i915, struct drm_i915_gem_execbuffer2 *execbuf) { int err; err = 0; if (ioctl(i915, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf)) { err = -errno; igt_assume(err); } errno = 0; return err; } static uint32_t far_delay(int i915, unsigned long delay, unsigned int target, const intel_ctx_t *ctx, const struct intel_execution_engine2 *e, int *fence) { struct drm_i915_gem_exec_object2 obj = delay_create(i915, 0, e, delay); struct drm_i915_gem_exec_object2 batch[2] = { { .handle = batch_create(i915), .flags = EXEC_OBJECT_WRITE, } }; struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = to_user_pointer(batch), .buffer_count = 2, .flags = e->flags, }; intel_ctx_cfg_t cfg = ctx->cfg; uint32_t handle = gem_create(i915, 4096); unsigned long count, submit; igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8); igt_require(gem_class_can_store_dword(i915, e->class)); fcntl(i915, F_SETFL, fcntl(i915, F_GETFL) | O_NONBLOCK); submit = 3 * target; submit *= NSEC_PER_SEC; submit /= 2 * delay; if (gem_has_vm(i915)) cfg.vm = gem_vm_create(i915); cfg.flags |= I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE; /* * Submit a few long chains of individually short pieces of work * against a shared object. */ for (count = 0; count < submit;) { const intel_ctx_t *tmp_ctx = intel_ctx_create(i915, &cfg); igt_assert(tmp_ctx->id); execbuf.rsvd1 = tmp_ctx->id; batch[1] = obj; while (__execbuf(i915, &execbuf) == 0) count++; intel_ctx_destroy(i915, tmp_ctx); } execbuf.flags |= I915_EXEC_FENCE_OUT; execbuf.rsvd1 = ctx->id; batch[1] = batch[0]; batch[1].flags &= ~EXEC_OBJECT_WRITE; batch[0].handle = handle; assert(batch[0].flags & EXEC_OBJECT_WRITE); gem_execbuf_wr(i915, &execbuf); gem_close(i915, obj.handle); /* And pass the resulting end fence out. */ *fence = execbuf.rsvd2 >> 32; return handle; } static void far_fence(int i915, int timeout, const intel_ctx_t *ctx, const struct intel_execution_engine2 *e) { int fence = -1; uint32_t handle = far_delay(i915, NSEC_PER_SEC / 250, timeout, ctx, e, &fence); gem_close(i915, handle); igt_assert_eq(sync_fence_wait(fence, -1), 0); /* * Many short pieces of work simulating independent clients working and * presenting work to a consumer should not be interrupted by the * watchdog. * * TODO/FIXME: Opens: * * 1) * Missing fence error propagation means consumer may fail to notice * the work hasn't actually been executed. * * There is also no clear agreement on whether error propagation is * desired or not. * * 2) * This assert could instead check that fence status is in error, if * it will be accepted this kind of workload should suddenly start * failing. Depends if the desire is to test watchdog could break * existing userspace or whether it is acceptable to silently not * execute workloads. * * 3) * Implement subtest which actually renders to a shared buffer so * watchdog effect on rendering result can also be demonstrated. */ igt_assert_eq(sync_fence_status(fence), 1); close(fence); } igt_main { const struct intel_execution_engine2 *e; const intel_ctx_t *ctx; int i915 = -1; igt_fixture { const unsigned int timeout = 1; char *tmp; i915 = drm_open_driver_master(DRIVER_INTEL); gem_submission_print_method(i915); gem_scheduler_print_capability(i915); igt_require_gem(i915); tmp = __igt_params_get(i915, "request_timeout_ms"); igt_skip_on_f(!tmp || !atoi(tmp), "Request expiry not supported!\n"); free(tmp); igt_params_save_and_set(i915, "request_timeout_ms", "%u", timeout * 1000); default_timeout_wait_s = timeout * 5; i915 = gem_reopen_driver(i915); /* Apply modparam. */ ctx = intel_ctx_create_all_physical(i915); } igt_subtest_group { igt_subtest("default-physical") physical(i915, ctx); igt_subtest("default-virtual") virtual(i915, &ctx->cfg); } igt_subtest_with_dynamic("far-fence") { for_each_ctx_engine(i915, ctx, e) { igt_dynamic_f("%s", e->name) far_fence(i915, default_timeout_wait_s * 3, ctx, e); } } igt_fixture { intel_ctx_destroy(i915, ctx); close(i915); } }