/* * Copyright © 2016 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * */ #include #include #include #include #include #include "igt_core.h" #include "drmtest.h" #include "igt_device.h" #include "igt_dummyload.h" #include "igt_gt.h" #include "intel_chipset.h" #include "intel_reg.h" #include "ioctl_wrappers.h" #include "sw_sync.h" #include "igt_vgem.h" #include "i915/gem_mman.h" /** * SECTION:igt_dummyload * @short_description: Library for submitting GPU workloads * @title: Dummyload * @include: igt.h * * A lot of igt testcases need some GPU workload to make sure a race window is * big enough. Unfortunately having a fixed amount of workload leads to * spurious test failures or overly long runtimes on some fast/slow platforms. * This library contains functionality to submit GPU workloads that should * consume exactly a specific amount of time. */ #define LOCAL_I915_EXEC_BSD_SHIFT (13) #define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) #define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) #define MI_ARB_CHK (0x5 << 23) static const int BATCH_SIZE = 4096; static IGT_LIST(spin_list); static pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER; static void fill_reloc(struct drm_i915_gem_relocation_entry *reloc, uint32_t gem_handle, uint32_t offset, uint32_t read_domains, uint32_t write_domains) { reloc->target_handle = gem_handle; reloc->offset = offset * sizeof(uint32_t); reloc->read_domains = read_domains; reloc->write_domain = write_domains; } static int emit_recursive_batch(igt_spin_t *spin, int fd, const struct igt_spin_factory *opts) { #define SCRATCH 0 #define BATCH 1 const int gen = intel_gen(intel_get_drm_devid(fd)); struct drm_i915_gem_relocation_entry relocs[2], *r; struct drm_i915_gem_execbuffer2 *execbuf; struct drm_i915_gem_exec_object2 *obj; unsigned int engines[16]; unsigned int nengine; int fence_fd = -1; uint32_t *batch, *batch_start; int i; nengine = 0; if (opts->engine == ALL_ENGINES) { unsigned int engine; for_each_physical_engine(fd, engine) { if (opts->flags & IGT_SPIN_POLL_RUN && !gem_can_store_dword(fd, engine)) continue; engines[nengine++] = engine; } } else { engines[nengine++] = opts->engine; } igt_require(nengine); memset(&spin->execbuf, 0, sizeof(spin->execbuf)); execbuf = &spin->execbuf; memset(spin->obj, 0, sizeof(spin->obj)); obj = spin->obj; memset(relocs, 0, sizeof(relocs)); obj[BATCH].handle = gem_create(fd, BATCH_SIZE); batch = __gem_mmap__wc(fd, obj[BATCH].handle, 0, BATCH_SIZE, PROT_WRITE); if (!batch) batch = gem_mmap__gtt(fd, obj[BATCH].handle, BATCH_SIZE, PROT_WRITE); gem_set_domain(fd, obj[BATCH].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); execbuf->buffer_count++; batch_start = batch; if (opts->dependency) { igt_assert(!(opts->flags & IGT_SPIN_POLL_RUN)); /* dummy write to dependency */ obj[SCRATCH].handle = opts->dependency; fill_reloc(&relocs[obj[BATCH].relocation_count++], opts->dependency, 1020, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); execbuf->buffer_count++; } else if (opts->flags & IGT_SPIN_POLL_RUN) { unsigned int offset; igt_assert(!opts->dependency); if (gen == 4 || gen == 5) { execbuf->flags |= I915_EXEC_SECURE; igt_require(__igt_device_set_master(fd) == 0); } spin->poll_handle = gem_create(fd, 4096); if (__gem_set_caching(fd, spin->poll_handle, I915_CACHING_CACHED) == 0) spin->running = gem_mmap__cpu(fd, spin->poll_handle, 0, 4096, PROT_READ | PROT_WRITE); else spin->running = gem_mmap__wc(fd, spin->poll_handle, 0, 4096, PROT_READ | PROT_WRITE); igt_assert_eq(*spin->running, 0); *batch++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { offset = 1; *batch++ = 0; *batch++ = 0; } else if (gen >= 4) { offset = 2; *batch++ = 0; *batch++ = 0; } else { offset = 1; batch[-1]--; *batch++ = 0; } *batch++ = 1; obj[SCRATCH].handle = spin->poll_handle; fill_reloc(&relocs[obj[BATCH].relocation_count++], spin->poll_handle, offset, 0, 0); execbuf->buffer_count++; } spin->batch = batch = batch_start + 64 / sizeof(*batch); spin->handle = obj[BATCH].handle; /* Allow ourselves to be preempted */ if (!(opts->flags & IGT_SPIN_NO_PREEMPTION)) *batch++ = MI_ARB_CHK; /* Pad with a few nops so that we do not completely hog the system. * * Part of the attraction of using a recursive batch is that it is * hard on the system (executing the "function" call is apparently * quite expensive). However, the GPU may hog the entire system for * a few minutes, preventing even NMI. Quite why this is so is unclear, * but presumably it relates to the PM_INTRMSK workaround on gen6/gen7. * If we give the system a break by having the GPU execute a few nops * between function calls, that appears enough to keep SNB out of * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262 */ if (!(opts->flags & IGT_SPIN_FAST)) batch += 1000; /* recurse */ r = &relocs[obj[BATCH].relocation_count++]; r->target_handle = obj[BATCH].handle; r->offset = (batch + 1 - batch_start) * sizeof(*batch); r->read_domains = I915_GEM_DOMAIN_COMMAND; r->delta = 64; if (gen >= 8) { *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *batch++ = r->delta; *batch++ = 0; } else if (gen >= 6) { *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = r->delta; } else { *batch++ = MI_BATCH_BUFFER_START | 2 << 6; if (gen < 4) r->delta |= 1; *batch = r->delta; batch++; } obj[BATCH].relocs_ptr = to_user_pointer(relocs); execbuf->buffers_ptr = to_user_pointer(obj + (2 - execbuf->buffer_count)); execbuf->rsvd1 = opts->ctx; if (opts->flags & IGT_SPIN_FENCE_OUT) execbuf->flags |= I915_EXEC_FENCE_OUT; for (i = 0; i < nengine; i++) { execbuf->flags &= ~ENGINE_MASK; execbuf->flags |= engines[i]; gem_execbuf_wr(fd, execbuf); if (opts->flags & IGT_SPIN_FENCE_OUT) { int _fd = execbuf->rsvd2 >> 32; igt_assert(_fd >= 0); if (fence_fd == -1) { fence_fd = _fd; } else { int old_fd = fence_fd; fence_fd = sync_fence_merge(old_fd, _fd); close(old_fd); close(_fd); } igt_assert(fence_fd >= 0); } } /* Make it easier for callers to resubmit. */ obj[BATCH].relocation_count = 0; obj[BATCH].relocs_ptr = 0; obj[SCRATCH].flags = EXEC_OBJECT_PINNED; obj[BATCH].flags = EXEC_OBJECT_PINNED; return fence_fd; } static igt_spin_t * spin_batch_create(int fd, const struct igt_spin_factory *opts) { igt_spin_t *spin; spin = calloc(1, sizeof(struct igt_spin)); igt_assert(spin); spin->out_fence = emit_recursive_batch(spin, fd, opts); pthread_mutex_lock(&list_lock); igt_list_add(&spin->link, &spin_list); pthread_mutex_unlock(&list_lock); return spin; } igt_spin_t * __igt_spin_batch_factory(int fd, const struct igt_spin_factory *opts) { return spin_batch_create(fd, opts); } /** * igt_spin_batch_factory: * @fd: open i915 drm file descriptor * @opts: controlling options such as context, engine, dependencies etc * * Start a recursive batch on a ring. Immediately returns a #igt_spin_t that * contains the batch's handle that can be waited upon. The returned structure * must be passed to igt_spin_batch_free() for post-processing. * * Returns: * Structure with helper internal state for igt_spin_batch_free(). */ igt_spin_t * igt_spin_batch_factory(int fd, const struct igt_spin_factory *opts) { igt_spin_t *spin; igt_require_gem(fd); if (opts->engine != ALL_ENGINES) { gem_require_ring(fd, opts->engine); if (opts->flags & IGT_SPIN_POLL_RUN) igt_require(gem_can_store_dword(fd, opts->engine)); } spin = spin_batch_create(fd, opts); igt_assert(gem_bo_busy(fd, spin->handle)); if (opts->flags & IGT_SPIN_FENCE_OUT) { struct pollfd pfd = { spin->out_fence, POLLIN }; igt_assert(poll(&pfd, 1, 0) == 0); } return spin; } static void notify(union sigval arg) { igt_spin_t *spin = arg.sival_ptr; igt_spin_batch_end(spin); } /** * igt_spin_batch_set_timeout: * @spin: spin batch state from igt_spin_batch_new() * @ns: amount of time in nanoseconds the batch continues to execute * before finishing. * * Specify a timeout. This ends the recursive batch associated with @spin after * the timeout has elapsed. */ void igt_spin_batch_set_timeout(igt_spin_t *spin, int64_t ns) { timer_t timer; struct sigevent sev; struct itimerspec its; igt_assert(ns > 0); if (!spin) return; igt_assert(!spin->timer); memset(&sev, 0, sizeof(sev)); sev.sigev_notify = SIGEV_THREAD; sev.sigev_value.sival_ptr = spin; sev.sigev_notify_function = notify; igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0); igt_assert(timer); memset(&its, 0, sizeof(its)); its.it_value.tv_sec = ns / NSEC_PER_SEC; its.it_value.tv_nsec = ns % NSEC_PER_SEC; igt_assert(timer_settime(timer, 0, &its, NULL) == 0); spin->timer = timer; } /** * igt_spin_batch_end: * @spin: spin batch state from igt_spin_batch_new() * * End the recursive batch associated with @spin manually. */ void igt_spin_batch_end(igt_spin_t *spin) { if (!spin) return; *spin->batch = MI_BATCH_BUFFER_END; __sync_synchronize(); } /** * igt_spin_batch_free: * @fd: open i915 drm file descriptor * @spin: spin batch state from igt_spin_batch_new() * * This function does the necessary post-processing after starting a recursive * batch with igt_spin_batch_new(). */ void igt_spin_batch_free(int fd, igt_spin_t *spin) { if (!spin) return; pthread_mutex_lock(&list_lock); igt_list_del(&spin->link); pthread_mutex_unlock(&list_lock); if (spin->timer) timer_delete(spin->timer); igt_spin_batch_end(spin); gem_munmap((void *)((unsigned long)spin->batch & (~4095UL)), BATCH_SIZE); if (spin->running) { gem_munmap(spin->running, 4096); gem_close(fd, spin->poll_handle); } gem_close(fd, spin->handle); if (spin->out_fence >= 0) close(spin->out_fence); free(spin); } void igt_terminate_spin_batches(void) { struct igt_spin *iter; pthread_mutex_lock(&list_lock); igt_list_for_each(iter, &spin_list, link) igt_spin_batch_end(iter); pthread_mutex_unlock(&list_lock); } static uint32_t plug_vgem_handle(struct igt_cork *cork, int fd) { struct vgem_bo bo; int dmabuf; uint32_t handle; cork->vgem.device = drm_open_driver(DRIVER_VGEM); igt_require(vgem_has_fences(cork->vgem.device)); bo.width = bo.height = 1; bo.bpp = 4; vgem_create(cork->vgem.device, &bo); cork->vgem.fence = vgem_fence_attach(cork->vgem.device, &bo, VGEM_FENCE_WRITE); dmabuf = prime_handle_to_fd(cork->vgem.device, bo.handle); handle = prime_fd_to_handle(fd, dmabuf); close(dmabuf); return handle; } static void unplug_vgem_handle(struct igt_cork *cork) { vgem_fence_signal(cork->vgem.device, cork->vgem.fence); close(cork->vgem.device); } static uint32_t plug_sync_fd(struct igt_cork *cork) { int fence; igt_require_sw_sync(); cork->sw_sync.timeline = sw_sync_timeline_create(); fence = sw_sync_timeline_create_fence(cork->sw_sync.timeline, 1); return fence; } static void unplug_sync_fd(struct igt_cork *cork) { sw_sync_timeline_inc(cork->sw_sync.timeline, 1); close(cork->sw_sync.timeline); } /** * igt_cork_plug: * @fd: open drm file descriptor * @method: method to utilize for corking. * @cork: structure that will be filled with the state of the cork bo. * Note: this has to match the corking method. * * This function provides a mechanism to stall submission. It provides two * blocking methods: * * VGEM_BO. * Imports a vgem bo with a fence attached to it. This bo can be used as a * dependency during submission to stall execution until the fence is signaled. * * SW_SYNC: * Creates a timeline and then a fence on that timeline. The fence can be used * as an input fence to a request, the request will be stalled until the fence * is signaled. * * The parameters required to unblock the execution and to cleanup are stored in * the provided cork structure. * * Returns: * Handle of the imported BO / Sw sync fence FD. */ uint32_t igt_cork_plug(struct igt_cork *cork, int fd) { igt_assert(cork->fd == -1); switch (cork->type) { case CORK_SYNC_FD: return plug_sync_fd(cork); case CORK_VGEM_HANDLE: return plug_vgem_handle(cork, fd); default: igt_assert_f(0, "Invalid cork type!\n"); return 0; } } /** * igt_cork_unplug: * @method: method to utilize for corking. * @cork: cork state from igt_cork_plug() * * This function unblocks the execution by signaling the fence attached to the * imported bo and does the necessary post-processing. * * NOTE: the handle returned by igt_cork_plug is not closed during this phase. */ void igt_cork_unplug(struct igt_cork *cork) { igt_assert(cork->fd != -1); switch (cork->type) { case CORK_SYNC_FD: unplug_sync_fd(cork); break; case CORK_VGEM_HANDLE: unplug_vgem_handle(cork); break; default: igt_assert_f(0, "Invalid cork type!\n"); } cork->fd = -1; /* Reset cork */ }