/* * Copyright © 2016 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * */ #include #include #include #include #include #include "igt_core.h" #include "drmtest.h" #include "igt_device.h" #include "igt_dummyload.h" #include "igt_gt.h" #include "intel_chipset.h" #include "intel_reg.h" #include "ioctl_wrappers.h" #include "sw_sync.h" #include "igt_vgem.h" #include "i915/gem_engine_topology.h" #include "i915/gem_mman.h" /** * SECTION:igt_dummyload * @short_description: Library for submitting GPU workloads * @title: Dummyload * @include: igt.h * * A lot of igt testcases need some GPU workload to make sure a race window is * big enough. Unfortunately having a fixed amount of workload leads to * spurious test failures or overly long runtimes on some fast/slow platforms. * This library contains functionality to submit GPU workloads that should * consume exactly a specific amount of time. */ #define LOCAL_I915_EXEC_BSD_SHIFT (13) #define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) #define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) #define MI_ARB_CHK (0x5 << 23) static const int BATCH_SIZE = 4096; static const int LOOP_START_OFFSET = 64; static IGT_LIST_HEAD(spin_list); static pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER; static int emit_recursive_batch(igt_spin_t *spin, int fd, const struct igt_spin_factory *opts) { #define SCRATCH 0 #define BATCH IGT_SPIN_BATCH const int gen = intel_gen(intel_get_drm_devid(fd)); struct drm_i915_gem_relocation_entry relocs[3], *r; struct drm_i915_gem_execbuffer2 *execbuf; struct drm_i915_gem_exec_object2 *obj; unsigned int flags[GEM_MAX_ENGINES]; unsigned int nengine; int fence_fd = -1; uint32_t *cs, *batch; int i; nengine = 0; if (opts->engine == ALL_ENGINES) { struct intel_execution_engine2 *engine; for_each_context_engine(fd, opts->ctx, engine) { if (opts->flags & IGT_SPIN_POLL_RUN && !gem_class_can_store_dword(fd, engine->class)) continue; flags[nengine++] = engine->flags; } } else { flags[nengine++] = opts->engine; } igt_require(nengine); memset(&spin->execbuf, 0, sizeof(spin->execbuf)); execbuf = &spin->execbuf; memset(spin->obj, 0, sizeof(spin->obj)); obj = spin->obj; memset(relocs, 0, sizeof(relocs)); obj[BATCH].handle = gem_create(fd, BATCH_SIZE); batch = gem_mmap__device_coherent(fd, obj[BATCH].handle, 0, BATCH_SIZE, PROT_WRITE); gem_set_domain(fd, obj[BATCH].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); execbuf->buffer_count++; cs = batch; if (opts->dependency) { igt_assert(!(opts->flags & IGT_SPIN_POLL_RUN)); r = &relocs[obj[BATCH].relocation_count++]; /* dummy write to dependency */ obj[SCRATCH].handle = opts->dependency; r->presumed_offset = 0; r->target_handle = obj[SCRATCH].handle; r->offset = sizeof(uint32_t) * 1020; r->delta = 0; r->read_domains = I915_GEM_DOMAIN_RENDER; r->write_domain = I915_GEM_DOMAIN_RENDER; execbuf->buffer_count++; } else if (opts->flags & IGT_SPIN_POLL_RUN) { r = &relocs[obj[BATCH].relocation_count++]; igt_assert(!opts->dependency); if (gen == 4 || gen == 5) { execbuf->flags |= I915_EXEC_SECURE; igt_require(__igt_device_set_master(fd) == 0); } spin->poll_handle = gem_create(fd, 4096); obj[SCRATCH].handle = spin->poll_handle; if (__gem_set_caching(fd, spin->poll_handle, I915_CACHING_CACHED) == 0) spin->poll = gem_mmap__cpu(fd, spin->poll_handle, 0, 4096, PROT_READ | PROT_WRITE); else spin->poll = gem_mmap__device_coherent(fd, spin->poll_handle, 0, 4096, PROT_READ | PROT_WRITE); igt_assert_eq(spin->poll[SPIN_POLL_START_IDX], 0); /* batch is first */ r->presumed_offset = 4096; r->target_handle = obj[SCRATCH].handle; r->offset = sizeof(uint32_t) * 1; r->delta = sizeof(uint32_t) * SPIN_POLL_START_IDX; *cs++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { *cs++ = r->presumed_offset + r->delta; *cs++ = 0; } else if (gen >= 4) { *cs++ = 0; *cs++ = r->presumed_offset + r->delta; r->offset += sizeof(uint32_t); } else { cs[-1]--; *cs++ = r->presumed_offset + r->delta; } *cs++ = 1; execbuf->buffer_count++; } spin->handle = obj[BATCH].handle; igt_assert_lt(cs - batch, LOOP_START_OFFSET / sizeof(*cs)); spin->condition = batch + LOOP_START_OFFSET / sizeof(*cs); cs = spin->condition; /* Allow ourselves to be preempted */ if (!(opts->flags & IGT_SPIN_NO_PREEMPTION)) *cs++ = MI_ARB_CHK; if (opts->flags & IGT_SPIN_INVALID_CS) *cs++ = 0xdeadbeef; /* Pad with a few nops so that we do not completely hog the system. * * Part of the attraction of using a recursive batch is that it is * hard on the system (executing the "function" call is apparently * quite expensive). However, the GPU may hog the entire system for * a few minutes, preventing even NMI. Quite why this is so is unclear, * but presumably it relates to the PM_INTRMSK workaround on gen6/gen7. * If we give the system a break by having the GPU execute a few nops * between function calls, that appears enough to keep SNB out of * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262 */ if (!(opts->flags & IGT_SPIN_FAST)) cs += 960; /* * When using a cmdparser, the batch is copied into a read only location * and validated. We are then unable to alter the executing batch, * breaking the older *spin->condition = MI_BB_END termination. * Instead we can use a conditional MI_BB_END here that looks at * the user's copy of the batch and terminates when they modified it, * no matter how they modify it (from either the GPU or CPU). */ if (gen >= 8) { /* arbitrary cutoff between ring/execlists submission */ r = &relocs[obj[BATCH].relocation_count++]; /* * On Sandybridge+ the comparison is a strict greater-than: * if the value at spin->condition is greater than BB_END, * we loop back to the beginning. * Beginning with Kabylake, we can select the comparison mode * and loop back to the beginning if spin->condition != BB_END * (using 5 << 12). * For simplicity, we try to stick to a one-size fits all. */ spin->condition = batch + BATCH_SIZE / sizeof(*batch) - 2; spin->condition[0] = 0xffffffff; spin->condition[1] = 0xffffffff; r->presumed_offset = 0; r->target_handle = obj[BATCH].handle; r->offset = (cs + 2 - batch) * sizeof(*cs); r->read_domains = I915_GEM_DOMAIN_COMMAND; r->delta = (spin->condition - batch) * sizeof(*cs); *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | 2; *cs++ = MI_BATCH_BUFFER_END; *cs++ = r->delta; *cs++ = 0; } /* recurse */ r = &relocs[obj[BATCH].relocation_count++]; r->target_handle = obj[BATCH].handle; r->offset = (cs + 1 - batch) * sizeof(*cs); r->read_domains = I915_GEM_DOMAIN_COMMAND; r->delta = LOOP_START_OFFSET; if (gen >= 8) { *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *cs++ = r->delta; *cs++ = 0; } else if (gen >= 6) { *cs++ = MI_BATCH_BUFFER_START | 1 << 8; *cs++ = r->delta; } else { *cs++ = MI_BATCH_BUFFER_START | 2 << 6; if (gen < 4) r->delta |= 1; *cs = r->delta; cs++; } obj[BATCH].relocs_ptr = to_user_pointer(relocs); execbuf->buffers_ptr = to_user_pointer(obj + (2 - execbuf->buffer_count)); execbuf->rsvd1 = opts->ctx; if (opts->flags & IGT_SPIN_FENCE_OUT) execbuf->flags |= I915_EXEC_FENCE_OUT; if (opts->flags & IGT_SPIN_FENCE_IN) { execbuf->flags |= I915_EXEC_FENCE_IN; execbuf->rsvd2 = opts->fence; } for (i = 0; i < nengine; i++) { execbuf->flags &= ~ENGINE_MASK; execbuf->flags |= flags[i]; gem_execbuf_wr(fd, execbuf); if (opts->flags & IGT_SPIN_FENCE_OUT) { int _fd = execbuf->rsvd2 >> 32; igt_assert(_fd >= 0); if (fence_fd == -1) { fence_fd = _fd; } else { int old_fd = fence_fd; fence_fd = sync_fence_merge(old_fd, _fd); close(old_fd); close(_fd); } igt_assert(fence_fd >= 0); } } igt_assert_lt(cs - batch, BATCH_SIZE / sizeof(*cs)); /* Make it easier for callers to resubmit. */ for (i = 0; i < ARRAY_SIZE(spin->obj); i++) { spin->obj[i].relocation_count = 0; spin->obj[i].relocs_ptr = 0; spin->obj[i].flags = EXEC_OBJECT_PINNED; } spin->cmd_precondition = *spin->condition; return fence_fd; } static igt_spin_t * spin_create(int fd, const struct igt_spin_factory *opts) { igt_spin_t *spin; spin = calloc(1, sizeof(struct igt_spin)); igt_assert(spin); spin->out_fence = emit_recursive_batch(spin, fd, opts); pthread_mutex_lock(&list_lock); igt_list_add(&spin->link, &spin_list); pthread_mutex_unlock(&list_lock); return spin; } igt_spin_t * __igt_spin_factory(int fd, const struct igt_spin_factory *opts) { return spin_create(fd, opts); } /** * igt_spin_factory: * @fd: open i915 drm file descriptor * @opts: controlling options such as context, engine, dependencies etc * * Start a recursive batch on a ring. Immediately returns a #igt_spin_t that * contains the batch's handle that can be waited upon. The returned structure * must be passed to igt_spin_free() for post-processing. * * Returns: * Structure with helper internal state for igt_spin_free(). */ igt_spin_t * igt_spin_factory(int fd, const struct igt_spin_factory *opts) { igt_spin_t *spin; igt_require_gem(fd); if (opts->engine != ALL_ENGINES) { struct intel_execution_engine2 e; int class; if (!gem_context_lookup_engine(fd, opts->engine, opts->ctx, &e)) { class = e.class; } else { gem_require_ring(fd, opts->engine); class = gem_execbuf_flags_to_engine_class(opts->engine); } if (opts->flags & IGT_SPIN_POLL_RUN) igt_require(gem_class_can_store_dword(fd, class)); } spin = spin_create(fd, opts); igt_assert(gem_bo_busy(fd, spin->handle)); if (opts->flags & IGT_SPIN_FENCE_OUT) { struct pollfd pfd = { spin->out_fence, POLLIN }; igt_assert(poll(&pfd, 1, 0) == 0); } return spin; } static void notify(union sigval arg) { igt_spin_t *spin = arg.sival_ptr; igt_spin_end(spin); } /** * igt_spin_set_timeout: * @spin: spin state from igt_spin_new() * @ns: amount of time in nanoseconds the batch continues to execute * before finishing. * * Specify a timeout. This ends the recursive batch associated with @spin after * the timeout has elapsed. */ void igt_spin_set_timeout(igt_spin_t *spin, int64_t ns) { timer_t timer; struct sigevent sev; struct itimerspec its; igt_assert(ns > 0); if (!spin) return; igt_assert(!spin->timer); memset(&sev, 0, sizeof(sev)); sev.sigev_notify = SIGEV_THREAD; sev.sigev_value.sival_ptr = spin; sev.sigev_notify_function = notify; igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0); igt_assert(timer); memset(&its, 0, sizeof(its)); its.it_value.tv_sec = ns / NSEC_PER_SEC; its.it_value.tv_nsec = ns % NSEC_PER_SEC; igt_assert(timer_settime(timer, 0, &its, NULL) == 0); spin->timer = timer; } /** * igt_spin_reset: * @spin: spin state from igt_spin_new() * * Reset the state of spin, allowing its reuse. */ void igt_spin_reset(igt_spin_t *spin) { if (igt_spin_has_poll(spin)) spin->poll[SPIN_POLL_START_IDX] = 0; *spin->condition = spin->cmd_precondition; __sync_synchronize(); } /** * igt_spin_end: * @spin: spin state from igt_spin_new() * * End the spinner associated with @spin manually. */ void igt_spin_end(igt_spin_t *spin) { if (!spin) return; *spin->condition = MI_BATCH_BUFFER_END; __sync_synchronize(); } /** * igt_spin_free: * @fd: open i915 drm file descriptor * @spin: spin state from igt_spin_new() * * This function does the necessary post-processing after starting a * spin with igt_spin_new() and then frees it. */ void igt_spin_free(int fd, igt_spin_t *spin) { if (!spin) return; pthread_mutex_lock(&list_lock); igt_list_del(&spin->link); pthread_mutex_unlock(&list_lock); if (spin->timer) timer_delete(spin->timer); igt_spin_end(spin); gem_munmap((void *)((unsigned long)spin->condition & (~4095UL)), BATCH_SIZE); if (spin->poll) { gem_munmap(spin->poll, 4096); gem_close(fd, spin->poll_handle); } if (spin->handle) gem_close(fd, spin->handle); if (spin->out_fence >= 0) close(spin->out_fence); free(spin); } void igt_terminate_spins(void) { struct igt_spin *iter; pthread_mutex_lock(&list_lock); igt_list_for_each_entry(iter, &spin_list, link) igt_spin_end(iter); pthread_mutex_unlock(&list_lock); } void igt_unshare_spins(void) { struct igt_spin *it, *n; /* Disable the automatic termination on inherited spinners */ igt_list_for_each_entry_safe(it, n, &spin_list, link) IGT_INIT_LIST_HEAD(&it->link); IGT_INIT_LIST_HEAD(&spin_list); } static uint32_t plug_vgem_handle(struct igt_cork *cork, int fd) { struct vgem_bo bo; int dmabuf; uint32_t handle; cork->vgem.device = drm_open_driver(DRIVER_VGEM); igt_require(vgem_has_fences(cork->vgem.device)); bo.width = bo.height = 1; bo.bpp = 4; vgem_create(cork->vgem.device, &bo); cork->vgem.fence = vgem_fence_attach(cork->vgem.device, &bo, VGEM_FENCE_WRITE); dmabuf = prime_handle_to_fd(cork->vgem.device, bo.handle); handle = prime_fd_to_handle(fd, dmabuf); close(dmabuf); return handle; } static void unplug_vgem_handle(struct igt_cork *cork) { vgem_fence_signal(cork->vgem.device, cork->vgem.fence); close(cork->vgem.device); } static uint32_t plug_sync_fd(struct igt_cork *cork) { int fence; igt_require_sw_sync(); cork->sw_sync.timeline = sw_sync_timeline_create(); fence = sw_sync_timeline_create_fence(cork->sw_sync.timeline, 1); return fence; } static void unplug_sync_fd(struct igt_cork *cork) { sw_sync_timeline_inc(cork->sw_sync.timeline, 1); close(cork->sw_sync.timeline); } /** * igt_cork_plug: * @fd: open drm file descriptor * @method: method to utilize for corking. * @cork: structure that will be filled with the state of the cork bo. * Note: this has to match the corking method. * * This function provides a mechanism to stall submission. It provides two * blocking methods: * * VGEM_BO. * Imports a vgem bo with a fence attached to it. This bo can be used as a * dependency during submission to stall execution until the fence is signaled. * * SW_SYNC: * Creates a timeline and then a fence on that timeline. The fence can be used * as an input fence to a request, the request will be stalled until the fence * is signaled. * * The parameters required to unblock the execution and to cleanup are stored in * the provided cork structure. * * Returns: * Handle of the imported BO / Sw sync fence FD. */ uint32_t igt_cork_plug(struct igt_cork *cork, int fd) { igt_assert(cork->fd == -1); switch (cork->type) { case CORK_SYNC_FD: return plug_sync_fd(cork); case CORK_VGEM_HANDLE: return plug_vgem_handle(cork, fd); default: igt_assert_f(0, "Invalid cork type!\n"); return 0; } } /** * igt_cork_unplug: * @method: method to utilize for corking. * @cork: cork state from igt_cork_plug() * * This function unblocks the execution by signaling the fence attached to the * imported bo and does the necessary post-processing. * * NOTE: the handle returned by igt_cork_plug is not closed during this phase. */ void igt_cork_unplug(struct igt_cork *cork) { igt_assert(cork->fd != -1); switch (cork->type) { case CORK_SYNC_FD: unplug_sync_fd(cork); break; case CORK_VGEM_HANDLE: unplug_vgem_handle(cork); break; default: igt_assert_f(0, "Invalid cork type!\n"); } cork->fd = -1; /* Reset cork */ }