/* * Copyright © 2021 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "config.h" #include #include #include #include #include #include "i915/gem.h" #include "igt.h" #include "igt_params.h" #include "sw_sync.h" #define EWATCHDOG EINTR static struct drm_i915_query_engine_info *__engines__; static int __i915_query(int fd, struct drm_i915_query *q) { if (igt_ioctl(fd, DRM_IOCTL_I915_QUERY, q)) return -errno; return 0; } static int __i915_query_items(int fd, struct drm_i915_query_item *items, uint32_t n_items) { struct drm_i915_query q = { .num_items = n_items, .items_ptr = to_user_pointer(items), }; return __i915_query(fd, &q); } #define i915_query_items(fd, items, n_items) do { \ igt_assert_eq(__i915_query_items(fd, items, n_items), 0); \ errno = 0; \ } while (0) static unsigned int default_timeout_wait_s; static const unsigned int watchdog_us = 500 * 1000; static unsigned int wait_timeout(int i915, igt_spin_t **spin, unsigned int num_engines, unsigned int wait_us, unsigned int expect) { unsigned int count_idle = 0, count_fence = 0, count_started = 0, i; bool started[num_engines]; memset(started, 0, sizeof(started)); while (count_started < num_engines) { for (i = 0; i < num_engines; i++) { if (started[i]) continue; if (igt_spin_has_started(spin[i])) { started[i] = true; count_started++; } } } igt_until_timeout(DIV_ROUND_UP(wait_us, USEC_PER_SEC)) { usleep(watchdog_us / 2); for (i = 0, count_idle = 0; i < num_engines; i++) { if (!gem_bo_busy(i915, spin[i]->handle)) count_idle++; } for (i = 0, count_fence = 0; i < num_engines; i++) { if (sync_fence_status(spin[i]->out_fence)) count_fence++; } if (count_idle == num_engines) break; } if (count_idle < expect) { for (i = 0; i < num_engines; i++) { if (gem_bo_busy(i915, spin[i]->handle)) igt_warn("Request %u/%u not cancelled!\n", i + 1, num_engines); } } if (count_fence < expect) { for (i = 0; i < num_engines; i++) { if (!sync_fence_status(spin[i]->out_fence)) igt_warn("Fence %u/%u not timed out!\n", i + 1, num_engines); } } igt_assert_eq(count_idle, count_fence); return count_fence; } static unsigned int spin_flags(void) { return IGT_SPIN_POLL_RUN | IGT_SPIN_FENCE_OUT; } static void physical(int i915) { const unsigned int wait_us = default_timeout_wait_s * USEC_PER_SEC; unsigned int num_engines = __engines__->num_engines, i, count; const struct intel_execution_engine2 *e; unsigned int expect = num_engines; igt_spin_t *spin[num_engines]; i = 0; __for_each_physical_engine(i915, e) { spin[i] = igt_spin_new(i915, .engine = e->flags, .flags = spin_flags()); i++; } count = wait_timeout(i915, spin, num_engines, wait_us, expect); for (i = 0; i < num_engines; i++) igt_spin_free(i915, spin[i]); igt_assert_eq(count, expect); } static struct i915_engine_class_instance * list_engines(unsigned int class, unsigned int *out) { struct i915_engine_class_instance *ci; unsigned int count = 0, size = 64, i; ci = malloc(size * sizeof(*ci)); igt_assert(ci); for (i = 0; i < __engines__->num_engines; i++) { struct drm_i915_engine_info *engine = (struct drm_i915_engine_info *)&__engines__->engines[i]; if (class != engine->engine.engine_class) continue; if (count == size) { size *= 2; ci = realloc(ci, size * sizeof(*ci)); igt_assert(ci); } ci[count++] = (struct i915_engine_class_instance){ .engine_class = class, .engine_instance = engine->engine.engine_instance, }; } if (!count) { free(ci); ci = NULL; } *out = count; return ci; } static size_t sizeof_load_balance(int count) { return offsetof(struct i915_context_engines_load_balance, engines[count]); } static size_t sizeof_param_engines(int count) { return offsetof(struct i915_context_param_engines, engines[count]); } #define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); }) static int __set_load_balancer(int i915, uint32_t ctx, const struct i915_engine_class_instance *ci, unsigned int count, void *ext) { struct i915_context_engines_load_balance *balancer = alloca0(sizeof_load_balance(count)); struct i915_context_param_engines *engines = alloca0(sizeof_param_engines(count + 1)); struct drm_i915_gem_context_param p = { .ctx_id = ctx, .param = I915_CONTEXT_PARAM_ENGINES, .size = sizeof_param_engines(count + 1), .value = to_user_pointer(engines) }; balancer->base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; balancer->base.next_extension = to_user_pointer(ext); igt_assert(count); balancer->num_siblings = count; memcpy(balancer->engines, ci, count * sizeof(*ci)); engines->extensions = to_user_pointer(balancer); engines->engines[0].engine_class = I915_ENGINE_CLASS_INVALID; engines->engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; memcpy(engines->engines + 1, ci, count * sizeof(*ci)); return __gem_context_set_param(i915, &p); } static void set_load_balancer(int i915, uint32_t ctx, const struct i915_engine_class_instance *ci, unsigned int count, void *ext) { igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0); } static void ctx_set_vm(int i915, uint32_t ctx, uint32_t vm) { struct drm_i915_gem_context_param arg = { .param = I915_CONTEXT_PARAM_VM, .ctx_id = ctx, .value = vm, }; gem_context_set_param(i915, &arg); } static uint32_t ctx_get_vm(int i915, uint32_t ctx) { struct drm_i915_gem_context_param arg; memset(&arg, 0, sizeof(arg)); arg.param = I915_CONTEXT_PARAM_VM; arg.ctx_id = ctx; gem_context_get_param(i915, &arg); igt_assert(arg.value); return arg.value; } static void virtual(int i915) { const unsigned int wait_us = default_timeout_wait_s * USEC_PER_SEC; unsigned int num_engines = __engines__->num_engines, i, count; igt_spin_t *spin[num_engines]; unsigned int expect = num_engines; uint32_t ctx[num_engines]; uint32_t vm; igt_require(gem_has_execlists(i915)); igt_debug("%u virtual engines\n", num_engines); igt_require(num_engines); i = 0; for (int class = 0; class < 32; class++) { struct i915_engine_class_instance *ci; ci = list_engines(class, &count); if (!ci) continue; for (int pass = 0; pass < count; pass++) { igt_assert(sizeof(*ci) == sizeof(int)); igt_permute_array(ci, count, igt_exchange_int); igt_assert(i < num_engines); ctx[i] = gem_context_create(i915); if (!i) vm = ctx_get_vm(i915, ctx[i]); else ctx_set_vm(i915, ctx[i], vm); set_load_balancer(i915, ctx[i], ci, count, NULL); spin[i] = igt_spin_new(i915, .ctx = ctx[i], .flags = spin_flags()); i++; } free(ci); } count = wait_timeout(i915, spin, num_engines, wait_us, expect); for (i = 0; i < num_engines && spin[i]; i++) { gem_context_destroy(i915, ctx[i]); igt_spin_free(i915, spin[i]); } igt_assert_eq(count, expect); } #define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) #define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) #define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) /* Opcodes for MI_MATH_INSTR */ #define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0) #define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2) #define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2) #define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1) #define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1) #define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0) #define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0) #define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0) #define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0) #define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0) #define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2) #define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2) /* Registers used as operands in MI_MATH_INSTR */ #define MI_MATH_REG(x) (x) #define MI_MATH_REG_SRCA 0x20 #define MI_MATH_REG_SRCB 0x21 #define MI_MATH_REG_ACCU 0x31 #define MI_MATH_REG_ZF 0x32 #define MI_MATH_REG_CF 0x33 #define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1) static unsigned int offset_in_page(void *addr) { return (uintptr_t)addr & 4095; } static uint64_t div64_u64_round_up(uint64_t x, uint64_t y) { return (x + y - 1) / y; } static int read_timestamp_frequency(int i915) { int value = 0; drm_i915_getparam_t gp = { .value = &value, .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY, }; ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp); return value; } static uint64_t ns_to_ticks(int i915, uint64_t ns) { return div64_u64_round_up(ns * read_timestamp_frequency(i915), NSEC_PER_SEC); } static uint32_t __batch_create(int i915, uint32_t offset) { const uint32_t bbe = MI_BATCH_BUFFER_END; uint32_t handle; handle = gem_create(i915, ALIGN(offset + 4, 4096)); gem_write(i915, handle, offset, &bbe, sizeof(bbe)); return handle; } static uint32_t batch_create(int i915) { return __batch_create(i915, 0); } static void delay(int i915, const struct intel_execution_engine2 *e, uint32_t handle, uint64_t addr, uint64_t ns) { const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8; const uint32_t base = gem_engine_mmio_base(i915, e->name); #define CS_GPR(x) (base + 0x600 + 8 * (x)) #define RUNTIME (base + 0x3a8) enum { START_TS, NOW_TS }; uint32_t *map, *cs, *jmp; igt_require(base); /* Loop until CTX_TIMESTAMP - initial > @ns */ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE); *cs++ = MI_LOAD_REGISTER_IMM; *cs++ = CS_GPR(START_TS) + 4; *cs++ = 0; *cs++ = MI_LOAD_REGISTER_REG; *cs++ = RUNTIME; *cs++ = CS_GPR(START_TS); while (offset_in_page(cs) & 63) *cs++ = 0; jmp = cs; *cs++ = 0x5 << 23; /* MI_ARB_CHECK */ *cs++ = MI_LOAD_REGISTER_IMM; *cs++ = CS_GPR(NOW_TS) + 4; *cs++ = 0; *cs++ = MI_LOAD_REGISTER_REG; *cs++ = RUNTIME; *cs++ = CS_GPR(NOW_TS); /* delta = now - start; inverted to match COND_BBE */ *cs++ = MI_MATH(4); *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); *cs++ = MI_MATH_SUB; *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU); /* Save delta for reading by COND_BBE */ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */ *cs++ = CS_GPR(NOW_TS); *cs++ = addr + 4000; *cs++ = addr >> 32; /* Delay between SRM and COND_BBE to post the writes */ for (int n = 0; n < 8; n++) { *cs++ = MI_STORE_DWORD_IMM; if (use_64b) { *cs++ = addr + 4064; *cs++ = addr >> 32; } else { *cs++ = 0; *cs++ = addr + 4064; } *cs++ = 0; } /* Break if delta > ns */ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b); *cs++ = ~ns_to_ticks(i915, ns); *cs++ = addr + 4000; *cs++ = addr >> 32; /* Otherwise back to recalculating delta */ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b; *cs++ = addr + offset_in_page(jmp); *cs++ = addr >> 32; munmap(map, 4096); } static struct drm_i915_gem_exec_object2 delay_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e, uint64_t target_ns) { struct drm_i915_gem_exec_object2 obj = { .handle = batch_create(i915), .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS, }; struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = to_user_pointer(&obj), .buffer_count = 1, .rsvd1 = ctx, .flags = e->flags, }; obj.offset = obj.handle << 12; gem_execbuf(i915, &execbuf); gem_sync(i915, obj.handle); delay(i915, e, obj.handle, obj.offset, target_ns); obj.flags |= EXEC_OBJECT_PINNED; return obj; } static uint32_t vm_clone(int i915) { uint32_t ctx = 0; __gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM | I915_CONTEXT_CLONE_ENGINES, I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE, &ctx); return ctx; } static int __execbuf(int i915, struct drm_i915_gem_execbuffer2 *execbuf) { int err; err = 0; if (ioctl(i915, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf)) { err = -errno; igt_assume(err); } errno = 0; return err; } static uint32_t far_delay(int i915, unsigned long delay, unsigned int target, const struct intel_execution_engine2 *e, int *fence) { struct drm_i915_gem_exec_object2 obj = delay_create(i915, 0, e, delay); struct drm_i915_gem_exec_object2 batch[2] = { { .handle = batch_create(i915), .flags = EXEC_OBJECT_WRITE, } }; struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = to_user_pointer(batch), .buffer_count = 2, .flags = e->flags, }; uint32_t handle = gem_create(i915, 4096); unsigned long count, submit; igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8); igt_require(gem_class_can_store_dword(i915, e->class)); fcntl(i915, F_SETFL, fcntl(i915, F_GETFL) | O_NONBLOCK); submit = 3 * target; submit *= NSEC_PER_SEC; submit /= 2 * delay; /* * Submit a few long chains of individually short pieces of work * against a shared object. */ for (count = 0; count < submit;) { execbuf.rsvd1 = vm_clone(i915); if (!execbuf.rsvd1) break; batch[1] = obj; while (__execbuf(i915, &execbuf) == 0) count++; gem_context_destroy(i915, execbuf.rsvd1); } execbuf.flags |= I915_EXEC_FENCE_OUT; execbuf.rsvd1 = 0; batch[1] = batch[0]; batch[1].flags &= ~EXEC_OBJECT_WRITE; batch[0].handle = handle; assert(batch[0].flags & EXEC_OBJECT_WRITE); gem_execbuf_wr(i915, &execbuf); gem_close(i915, obj.handle); /* And pass the resulting end fence out. */ *fence = execbuf.rsvd2 >> 32; return handle; } static void far_fence(int i915, int timeout, const struct intel_execution_engine2 *e) { int fence = -1; uint32_t handle = far_delay(i915, NSEC_PER_SEC / 250, timeout, e, &fence); gem_close(i915, handle); igt_assert_eq(sync_fence_wait(fence, -1), 0); /* * Many short pieces of work simulating independent clients working and * presenting work to a consumer should not be interrupted by the * watchdog. * * TODO/FIXME: Opens: * * 1) * Missing fence error propagation means consumer may fail to notice * the work hasn't actually been executed. * * There is also no clear agreement on whether error propagation is * desired or not. * * 2) * This assert could instead check that fence status is in error, if * it will be accepted this kind of workload should suddenly start * failing. Depends if the desire is to test watchdog could break * existing userspace or whether it is acceptable to silently not * execute workloads. * * 3) * Implement subtest which actually renders to a shared buffer so * watchdog effect on rendering result can also be demonstrated. */ igt_assert_eq(sync_fence_status(fence), 1); close(fence); } igt_main { const struct intel_execution_engine2 *e; int i915 = -1; igt_fixture { struct drm_i915_query_item item; char *tmp; i915 = drm_open_driver_master(DRIVER_INTEL); gem_submission_print_method(i915); gem_scheduler_print_capability(i915); igt_require_gem(i915); tmp = __igt_params_get(i915, "request_timeout_ms"); if (tmp) { const unsigned int timeout = 1; igt_params_save_and_set(i915, "request_timeout_ms", "%u", timeout * 1000); default_timeout_wait_s = timeout * 5; free(tmp); } else { default_timeout_wait_s = 12; } i915 = gem_reopen_driver(i915); /* Apply modparam. */ __engines__ = malloc(4096); igt_assert(__engines__); memset(__engines__, 0, 4096); memset(&item, 0, sizeof(item)); item.query_id = DRM_I915_QUERY_ENGINE_INFO; item.data_ptr = to_user_pointer(__engines__); item.length = 4096; i915_query_items(i915, &item, 1); igt_assert(item.length >= 0); igt_assert(item.length <= 4096); igt_assert(__engines__->num_engines > 0); } igt_subtest_group { igt_subtest("default-physical") physical(i915); igt_subtest("default-virtual") virtual(i915); } igt_subtest_with_dynamic("far-fence") { __for_each_physical_engine(i915, e) { igt_dynamic_f("%s", e->name) far_fence(i915, default_timeout_wait_s * 3, e); } } igt_fixture { close(i915); } }