/* * Copyright © 2011 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Chris Wilson * */ #include "igt.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "drm.h" #define LOCAL_I915_EXEC_NO_RELOC (1<<11) #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) #define LOCAL_I915_EXEC_BSD_SHIFT (13) #define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) #define ENGINE_FLAGS (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) static double elapsed(const struct timespec *start, const struct timespec *end) { return ((end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)*1e-9); } static double nop_on_ring(int fd, uint32_t handle, unsigned ring_id, int timeout, unsigned long *out) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj; struct timespec start, now; unsigned long count; memset(&obj, 0, sizeof(obj)); obj.handle = handle; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&obj; execbuf.buffer_count = 1; execbuf.flags = ring_id; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = ring_id; gem_execbuf(fd, &execbuf); } gem_sync(fd, handle); intel_detect_and_clear_missed_interrupts(fd); count = 0; clock_gettime(CLOCK_MONOTONIC, &start); do { for (int loop = 0; loop < 1024; loop++) gem_execbuf(fd, &execbuf); count += 1024; clock_gettime(CLOCK_MONOTONIC, &now); } while (elapsed(&start, &now) < timeout); gem_sync(fd, handle); clock_gettime(CLOCK_MONOTONIC, &now); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); *out = count; return elapsed(&start, &now); } static void single(int fd, uint32_t handle, unsigned ring_id, const char *ring_name) { double time; unsigned long count; gem_require_ring(fd, ring_id); time = nop_on_ring(fd, handle, ring_id, 20, &count); igt_info("%s: %'lu cycles: %.3fus\n", ring_name, count, time*1e6 / count); } static bool ignore_engine(int fd, unsigned engine) { if (engine == 0) return true; if (gem_has_bsd2(fd) && engine == I915_EXEC_BSD) return true; return false; } static void all(int fd, uint32_t handle, int timeout) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj; struct timespec start, now; unsigned engines[16]; unsigned nengine; unsigned engine; unsigned long count; double time, max = 0, min = HUGE_VAL, sum = 0; const char *name; nengine = 0; for_each_engine(fd, engine) { if (ignore_engine(fd, engine)) continue; time = nop_on_ring(fd, handle, engine, 1, &count) / count; if (time > max) { name = e__->name; max = time; } if (time < min) min = time; sum += time; engines[nengine++] = engine; } igt_require(nengine); igt_info("Maximum execution latency on %s, %.3fus, total %.3fus per cycle\n", name, max*1e6, sum*1e6); memset(&obj, 0, sizeof(obj)); obj.handle = handle; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&obj; execbuf.buffer_count = 1; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = 0; gem_execbuf(fd, &execbuf); } gem_sync(fd, handle); intel_detect_and_clear_missed_interrupts(fd); count = 0; clock_gettime(CLOCK_MONOTONIC, &start); do { for (int loop = 0; loop < 1024; loop++) { for (int n = 0; n < nengine; n++) { execbuf.flags &= ~ENGINE_FLAGS; execbuf.flags |= engines[n]; gem_execbuf(fd, &execbuf); } } count += nengine * 1024; clock_gettime(CLOCK_MONOTONIC, &now); } while (elapsed(&start, &now) < timeout); /* Hang detection ~120s */ gem_sync(fd, handle); clock_gettime(CLOCK_MONOTONIC, &now); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); time = elapsed(&start, &now) / count; igt_info("All (%d engines): %'lu cycles, average %.3fus per cycle\n", nengine, count, 1e6*time); /* The rate limiting step is how fast the slowest engine can * its queue of requests, if we wait upon a full ring all dispatch * is frozen. So in general we cannot go faster than the slowest * engine, but we should equally not go any slower. */ igt_assert_f(time < max + 10*min/9, /* ensure parallel execution */ "Average time (%.3fus) exceeds expecation for parallel execution (min %.3fus, max %.3fus; limit set at %.3fus)\n", 1e6*time, 1e6*min, 1e6*max, 1e6*(max + 10*min/9)); } igt_main { const struct intel_execution_engine *e; uint32_t handle = 0; int device = -1; igt_fixture { const uint32_t bbe = MI_BATCH_BUFFER_END; device = drm_open_driver(DRIVER_INTEL); handle = gem_create(device, 4096); gem_write(device, handle, 0, &bbe, sizeof(bbe)); igt_fork_hang_detector(device); } igt_subtest("basic") all(device, handle, 10); for (e = intel_execution_engines; e->name; e++) igt_subtest_f("%s", e->name) single(device, handle, e->exec_id | e->flags, e->name); igt_subtest("all") all(device, handle, 150); igt_fixture { igt_stop_hang_detector(); gem_close(device, handle); close(device); } }