diff options
author | Arkadiusz Hiler <arkadiusz.hiler@intel.com> | 2018-10-18 14:06:42 +0300 |
---|---|---|
committer | Arkadiusz Hiler <arkadiusz.hiler@intel.com> | 2018-10-23 10:55:51 +0300 |
commit | 741bf7064c467df725c14cc0b3b8b50436f9ee09 (patch) | |
tree | 0ad6fb217dca79a8f1175fb289979b574222fefa /tests/i915/gem_sync.c | |
parent | 78619fde4008424c472906041edb1d204e014f7c (diff) |
tests: Introduce i915 directory
We can already move all the tests with distinct prefixes: gem_, gen3_
and i915_.
pm_ and drv_ tests will follow in batches, so we can do the
adjustments in the reporting/filtering layer of the CI system.
v2: Fix test-list.txt generation with meson
v3: Fix docs build (Petri)
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Petri Latvala <petri.latvala@intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Signed-off-by: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Reviewed-by: Petri Latvala <petri.latvala@intel.com>
Tested-by: Petri Latvala <petri.latvala@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'tests/i915/gem_sync.c')
-rw-r--r-- | tests/i915/gem_sync.c | 1304 |
1 files changed, 1304 insertions, 0 deletions
diff --git a/tests/i915/gem_sync.c b/tests/i915/gem_sync.c new file mode 100644 index 00000000..fb209977 --- /dev/null +++ b/tests/i915/gem_sync.c @@ -0,0 +1,1304 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <time.h> +#include <pthread.h> + +#include "igt.h" +#include "igt_sysfs.h" + +#define LOCAL_I915_EXEC_NO_RELOC (1<<11) +#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) + +#define LOCAL_I915_EXEC_BSD_SHIFT (13) +#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) + +#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY +#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY + +#define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) + +IGT_TEST_DESCRIPTION("Basic check of ring<->ring write synchronisation."); + +/* + * Testcase: Basic check of sync + * + * Extremely efficient at catching missed irqs + */ + +static double gettime(void) +{ + static clockid_t clock = -1; + struct timespec ts; + + /* Stay on the same clock for consistency. */ + if (clock != (clockid_t)-1) { + if (clock_gettime(clock, &ts)) + goto error; + goto out; + } + +#ifdef CLOCK_MONOTONIC_RAW + if (!clock_gettime(clock = CLOCK_MONOTONIC_RAW, &ts)) + goto out; +#endif +#ifdef CLOCK_MONOTONIC_COARSE + if (!clock_gettime(clock = CLOCK_MONOTONIC_COARSE, &ts)) + goto out; +#endif + if (!clock_gettime(clock = CLOCK_MONOTONIC, &ts)) + goto out; +error: + igt_warn("Could not read monotonic time: %s\n", + strerror(errno)); + igt_assert(0); + return 0; + +out: + return ts.tv_sec + 1e-9*ts.tv_nsec; +} + +static void +sync_ring(int fd, unsigned ring, int num_children, int timeout) +{ + unsigned engines[16]; + const char *names[16]; + int num_engines = 0; + + if (ring == ALL_ENGINES) { + for_each_physical_engine(fd, ring) { + names[num_engines] = e__->name; + engines[num_engines++] = ring; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + + num_children *= num_engines; + } else { + gem_require_ring(fd, ring); + names[num_engines] = NULL; + engines[num_engines++] = ring; + } + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_children) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object; + struct drm_i915_gem_execbuffer2 execbuf; + double start, elapsed; + unsigned long cycles; + + memset(&object, 0, sizeof(object)); + object.handle = gem_create(fd, 4096); + gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(&object); + execbuf.buffer_count = 1; + execbuf.flags = engines[child % num_engines]; + gem_execbuf(fd, &execbuf); + gem_sync(fd, object.handle); + + start = gettime(); + cycles = 0; + do { + do { + gem_execbuf(fd, &execbuf); + gem_sync(fd, object.handle); + } while (++cycles & 1023); + } while ((elapsed = gettime() - start) < timeout); + igt_info("%s%sompleted %ld cycles: %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " c" : "C", + cycles, elapsed*1e6/cycles); + + gem_close(fd, object.handle); + } + igt_waitchildren_timeout(timeout+10, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + +static void +idle_ring(int fd, unsigned ring, int timeout) +{ + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object; + struct drm_i915_gem_execbuffer2 execbuf; + double start, elapsed; + unsigned long cycles; + + gem_require_ring(fd, ring); + + memset(&object, 0, sizeof(object)); + object.handle = gem_create(fd, 4096); + gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(&object); + execbuf.buffer_count = 1; + execbuf.flags = ring; + gem_execbuf(fd, &execbuf); + gem_sync(fd, object.handle); + + intel_detect_and_clear_missed_interrupts(fd); + start = gettime(); + cycles = 0; + do { + do { + gem_execbuf(fd, &execbuf); + gem_quiescent_gpu(fd); + } while (++cycles & 1023); + } while ((elapsed = gettime() - start) < timeout); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); + + igt_info("Completed %ld cycles: %.3f us\n", + cycles, elapsed*1e6/cycles); + + gem_close(fd, object.handle); +} + +static void +wakeup_ring(int fd, unsigned ring, int timeout, int wlen) +{ + unsigned engines[16]; + const char *names[16]; + int num_engines = 0; + + if (ring == ALL_ENGINES) { + for_each_physical_engine(fd, ring) { + if (!gem_can_store_dword(fd, ring)) + continue; + + names[num_engines] = e__->name; + engines[num_engines++] = ring; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + igt_require(num_engines); + } else { + gem_require_ring(fd, ring); + igt_require(gem_can_store_dword(fd, ring)); + names[num_engines] = NULL; + engines[num_engines++] = ring; + } + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_engines) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object; + struct drm_i915_gem_execbuffer2 execbuf; + double end, this, elapsed, now, baseline; + unsigned long cycles; + uint32_t cmd; + igt_spin_t *spin; + + memset(&object, 0, sizeof(object)); + object.handle = gem_create(fd, 4096); + gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(&object); + execbuf.buffer_count = 1; + execbuf.flags = engines[child % num_engines]; + + spin = __igt_spin_batch_new(fd, + .engine = execbuf.flags, + .flags = (IGT_SPIN_POLL_RUN | + IGT_SPIN_FAST)); + igt_assert(spin->running); + cmd = *spin->batch; + + gem_execbuf(fd, &execbuf); + + igt_spin_batch_end(spin); + gem_sync(fd, object.handle); + + for (int warmup = 0; warmup <= 1; warmup++) { + end = gettime() + timeout/10.; + elapsed = 0; + cycles = 0; + do { + *spin->batch = cmd; + *spin->running = 0; + gem_execbuf(fd, &spin->execbuf); + while (!READ_ONCE(*spin->running)) + ; + + this = gettime(); + igt_spin_batch_end(spin); + gem_sync(fd, spin->handle); + now = gettime(); + + elapsed += now - this; + cycles++; + } while (now < end); + baseline = elapsed / cycles; + } + igt_info("%s%saseline %ld cycles: %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " b" : "B", + cycles, elapsed*1e6/cycles); + + end = gettime() + timeout; + elapsed = 0; + cycles = 0; + do { + *spin->batch = cmd; + *spin->running = 0; + gem_execbuf(fd, &spin->execbuf); + while (!READ_ONCE(*spin->running)) + ; + + for (int n = 0; n < wlen; n++) + gem_execbuf(fd, &execbuf); + + this = gettime(); + igt_spin_batch_end(spin); + gem_sync(fd, object.handle); + now = gettime(); + + elapsed += now - this; + cycles++; + } while (now < end); + elapsed -= cycles * baseline; + + igt_info("%s%sompleted %ld cycles: %.3f + %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " c" : "C", + cycles, 1e6*baseline, elapsed*1e6/cycles); + + igt_spin_batch_free(fd, spin); + gem_close(fd, object.handle); + } + igt_waitchildren_timeout(2*timeout, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + +static void active_ring(int fd, unsigned ring, int timeout) +{ + unsigned engines[16]; + const char *names[16]; + int num_engines = 0; + + if (ring == ALL_ENGINES) { + for_each_physical_engine(fd, ring) { + if (!gem_can_store_dword(fd, ring)) + continue; + + names[num_engines] = e__->name; + engines[num_engines++] = ring; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + igt_require(num_engines); + } else { + gem_require_ring(fd, ring); + igt_require(gem_can_store_dword(fd, ring)); + names[num_engines] = NULL; + engines[num_engines++] = ring; + } + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_engines) { + double start, end, elapsed; + unsigned long cycles; + igt_spin_t *spin[2]; + uint32_t cmd; + + spin[0] = __igt_spin_batch_new(fd, + .engine = ring, + .flags = IGT_SPIN_FAST); + cmd = *spin[0]->batch; + + spin[1] = __igt_spin_batch_new(fd, + .engine = ring, + .flags = IGT_SPIN_FAST); + igt_assert(*spin[1]->batch == cmd); + + start = gettime(); + end = start + timeout; + cycles = 0; + do { + for (int loop = 0; loop < 1024; loop++) { + igt_spin_t *s = spin[loop & 1]; + + igt_spin_batch_end(s); + gem_sync(fd, s->handle); + + *s->batch = cmd; + gem_execbuf(fd, &s->execbuf); + } + cycles += 1024; + } while ((elapsed = gettime()) < end); + igt_spin_batch_free(fd, spin[1]); + igt_spin_batch_free(fd, spin[0]); + + igt_info("%s%sompleted %ld cycles: %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " c" : "C", + cycles, (elapsed - start)*1e6/cycles); + } + igt_waitchildren_timeout(2*timeout, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + +static void +active_wakeup_ring(int fd, unsigned ring, int timeout, int wlen) +{ + unsigned engines[16]; + const char *names[16]; + int num_engines = 0; + + if (ring == ALL_ENGINES) { + for_each_physical_engine(fd, ring) { + if (!gem_can_store_dword(fd, ring)) + continue; + + names[num_engines] = e__->name; + engines[num_engines++] = ring; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + igt_require(num_engines); + } else { + gem_require_ring(fd, ring); + igt_require(gem_can_store_dword(fd, ring)); + names[num_engines] = NULL; + engines[num_engines++] = ring; + } + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_engines) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object; + struct drm_i915_gem_execbuffer2 execbuf; + double end, this, elapsed, now, baseline; + unsigned long cycles; + igt_spin_t *spin[2]; + uint32_t cmd; + + memset(&object, 0, sizeof(object)); + object.handle = gem_create(fd, 4096); + gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(&object); + execbuf.buffer_count = 1; + execbuf.flags = engines[child % num_engines]; + + spin[0] = __igt_spin_batch_new(fd, + .engine = execbuf.flags, + .flags = (IGT_SPIN_POLL_RUN | + IGT_SPIN_FAST)); + igt_assert(spin[0]->running); + cmd = *spin[0]->batch; + + spin[1] = __igt_spin_batch_new(fd, + .engine = execbuf.flags, + .flags = (IGT_SPIN_POLL_RUN | + IGT_SPIN_FAST)); + + gem_execbuf(fd, &execbuf); + + igt_spin_batch_end(spin[1]); + igt_spin_batch_end(spin[0]); + gem_sync(fd, object.handle); + + for (int warmup = 0; warmup <= 1; warmup++) { + *spin[0]->batch = cmd; + *spin[0]->running = 0; + gem_execbuf(fd, &spin[0]->execbuf); + + end = gettime() + timeout/10.; + elapsed = 0; + cycles = 0; + do { + while (!READ_ONCE(*spin[0]->running)) + ; + + *spin[1]->batch = cmd; + *spin[1]->running = 0; + gem_execbuf(fd, &spin[1]->execbuf); + + this = gettime(); + igt_spin_batch_end(spin[0]); + gem_sync(fd, spin[0]->handle); + now = gettime(); + + elapsed += now - this; + cycles++; + igt_swap(spin[0], spin[1]); + } while (now < end); + igt_spin_batch_end(spin[0]); + baseline = elapsed / cycles; + } + igt_info("%s%saseline %ld cycles: %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " b" : "B", + cycles, elapsed*1e6/cycles); + + *spin[0]->batch = cmd; + *spin[0]->running = 0; + gem_execbuf(fd, &spin[0]->execbuf); + + end = gettime() + timeout; + elapsed = 0; + cycles = 0; + do { + while (!READ_ONCE(*spin[0]->running)) + ; + + for (int n = 0; n < wlen; n++) + gem_execbuf(fd, &execbuf); + + *spin[1]->batch = cmd; + *spin[1]->running = 0; + gem_execbuf(fd, &spin[1]->execbuf); + + this = gettime(); + igt_spin_batch_end(spin[0]); + gem_sync(fd, object.handle); + now = gettime(); + + elapsed += now - this; + cycles++; + igt_swap(spin[0], spin[1]); + } while (now < end); + igt_spin_batch_end(spin[0]); + elapsed -= cycles * baseline; + + igt_info("%s%sompleted %ld cycles: %.3f + %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " c" : "C", + cycles, 1e6*baseline, elapsed*1e6/cycles); + + igt_spin_batch_free(fd, spin[1]); + igt_spin_batch_free(fd, spin[0]); + gem_close(fd, object.handle); + } + igt_waitchildren_timeout(2*timeout, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + +static void +store_ring(int fd, unsigned ring, int num_children, int timeout) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + unsigned engines[16]; + const char *names[16]; + int num_engines = 0; + + if (ring == ALL_ENGINES) { + for_each_physical_engine(fd, ring) { + if (!gem_can_store_dword(fd, ring)) + continue; + + names[num_engines] = e__->name; + engines[num_engines++] = ring; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + + num_children *= num_engines; + } else { + gem_require_ring(fd, ring); + igt_require(gem_can_store_dword(fd, ring)); + names[num_engines] = NULL; + engines[num_engines++] = ring; + } + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_children) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object[2]; + struct drm_i915_gem_relocation_entry reloc[1024]; + struct drm_i915_gem_execbuffer2 execbuf; + double start, elapsed; + unsigned long cycles; + uint32_t *batch, *b; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(object); + execbuf.flags = engines[child % num_engines]; + execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; + execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; + if (gen < 6) + execbuf.flags |= I915_EXEC_SECURE; + + memset(object, 0, sizeof(object)); + object[0].handle = gem_create(fd, 4096); + gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe)); + execbuf.buffer_count = 1; + gem_execbuf(fd, &execbuf); + + object[0].flags |= EXEC_OBJECT_WRITE; + object[1].handle = gem_create(fd, 20*1024); + + object[1].relocs_ptr = to_user_pointer(reloc); + object[1].relocation_count = 1024; + + batch = gem_mmap__cpu(fd, object[1].handle, 0, 20*1024, + PROT_WRITE | PROT_READ); + gem_set_domain(fd, object[1].handle, + I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + + memset(reloc, 0, sizeof(reloc)); + b = batch; + for (int i = 0; i < 1024; i++) { + uint64_t offset; + + reloc[i].presumed_offset = object[0].offset; + reloc[i].offset = (b - batch + 1) * sizeof(*batch); + reloc[i].delta = i * sizeof(uint32_t); + reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; + reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; + + offset = object[0].offset + reloc[i].delta; + *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + *b++ = offset; + *b++ = offset >> 32; + } else if (gen >= 4) { + *b++ = 0; + *b++ = offset; + reloc[i].offset += sizeof(*batch); + } else { + b[-1] -= 1; + *b++ = offset; + } + *b++ = i; + } + *b++ = MI_BATCH_BUFFER_END; + igt_assert((b - batch)*sizeof(uint32_t) < 20*1024); + munmap(batch, 20*1024); + execbuf.buffer_count = 2; + gem_execbuf(fd, &execbuf); + gem_sync(fd, object[1].handle); + + start = gettime(); + cycles = 0; + do { + do { + gem_execbuf(fd, &execbuf); + gem_sync(fd, object[1].handle); + } while (++cycles & 1023); + } while ((elapsed = gettime() - start) < timeout); + igt_info("%s%sompleted %ld cycles: %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " c" : "C", + cycles, elapsed*1e6/cycles); + + gem_close(fd, object[1].handle); + gem_close(fd, object[0].handle); + } + igt_waitchildren_timeout(timeout+10, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + +static void +switch_ring(int fd, unsigned ring, int num_children, int timeout) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + unsigned engines[16]; + const char *names[16]; + int num_engines = 0; + + gem_require_contexts(fd); + + if (ring == ALL_ENGINES) { + for_each_physical_engine(fd, ring) { + if (!gem_can_store_dword(fd, ring)) + continue; + + names[num_engines] = e__->name; + engines[num_engines++] = ring; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + + num_children *= num_engines; + } else { + gem_require_ring(fd, ring); + igt_require(gem_can_store_dword(fd, ring)); + names[num_engines] = NULL; + engines[num_engines++] = ring; + } + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_children) { + struct context { + struct drm_i915_gem_exec_object2 object[2]; + struct drm_i915_gem_relocation_entry reloc[1024]; + struct drm_i915_gem_execbuffer2 execbuf; + } contexts[2]; + double start, elapsed; + unsigned long cycles; + + for (int i = 0; i < ARRAY_SIZE(contexts); i++) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + const uint32_t sz = 32 << 10; + struct context *c = &contexts[i]; + uint32_t *batch, *b; + + memset(&c->execbuf, 0, sizeof(c->execbuf)); + c->execbuf.buffers_ptr = to_user_pointer(c->object); + c->execbuf.flags = engines[child % num_engines]; + c->execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; + c->execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; + if (gen < 6) + c->execbuf.flags |= I915_EXEC_SECURE; + c->execbuf.rsvd1 = gem_context_create(fd); + + memset(c->object, 0, sizeof(c->object)); + c->object[0].handle = gem_create(fd, 4096); + gem_write(fd, c->object[0].handle, 0, &bbe, sizeof(bbe)); + c->execbuf.buffer_count = 1; + gem_execbuf(fd, &c->execbuf); + + c->object[0].flags |= EXEC_OBJECT_WRITE; + c->object[1].handle = gem_create(fd, sz); + + c->object[1].relocs_ptr = to_user_pointer(c->reloc); + c->object[1].relocation_count = 1024; + + batch = gem_mmap__cpu(fd, c->object[1].handle, 0, sz, + PROT_WRITE | PROT_READ); + gem_set_domain(fd, c->object[1].handle, + I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + + memset(c->reloc, 0, sizeof(c->reloc)); + b = batch; + for (int r = 0; r < 1024; r++) { + uint64_t offset; + + c->reloc[r].presumed_offset = c->object[0].offset; + c->reloc[r].offset = (b - batch + 1) * sizeof(*batch); + c->reloc[r].delta = r * sizeof(uint32_t); + c->reloc[r].read_domains = I915_GEM_DOMAIN_INSTRUCTION; + c->reloc[r].write_domain = I915_GEM_DOMAIN_INSTRUCTION; + + offset = c->object[0].offset + c->reloc[r].delta; + *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + *b++ = offset; + *b++ = offset >> 32; + } else if (gen >= 4) { + *b++ = 0; + *b++ = offset; + c->reloc[r].offset += sizeof(*batch); + } else { + b[-1] -= 1; + *b++ = offset; + } + *b++ = r; + *b++ = 0x5 << 23; + } + *b++ = MI_BATCH_BUFFER_END; + igt_assert((b - batch)*sizeof(uint32_t) < sz); + munmap(batch, sz); + c->execbuf.buffer_count = 2; + gem_execbuf(fd, &c->execbuf); + gem_sync(fd, c->object[1].handle); + } + + cycles = 0; + elapsed = 0; + start = gettime(); + do { + do { + double this; + + gem_execbuf(fd, &contexts[0].execbuf); + gem_execbuf(fd, &contexts[1].execbuf); + + this = gettime(); + gem_sync(fd, contexts[1].object[1].handle); + elapsed += gettime() - this; + + gem_sync(fd, contexts[0].object[1].handle); + } while (++cycles & 1023); + } while ((gettime() - start) < timeout); + igt_info("%s%sompleted %ld cycles: %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " c" : "C", + cycles, elapsed*1e6/cycles); + + for (int i = 0; i < ARRAY_SIZE(contexts); i++) { + gem_close(fd, contexts[i].object[1].handle); + gem_close(fd, contexts[i].object[0].handle); + gem_context_destroy(fd, contexts[i].execbuf.rsvd1); + } + } + igt_waitchildren_timeout(timeout+10, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + +static void xchg(void *array, unsigned i, unsigned j) +{ + uint32_t *u32 = array; + uint32_t tmp = u32[i]; + u32[i] = u32[j]; + u32[j] = tmp; +} + +struct waiter { + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cond; + + int ready; + volatile int *done; + + int fd; + struct drm_i915_gem_exec_object2 object; + uint32_t handles[64]; +}; + +static void *waiter(void *arg) +{ + struct waiter *w = arg; + + do { + pthread_mutex_lock(&w->mutex); + w->ready = 0; + pthread_cond_signal(&w->cond); + while (!w->ready) + pthread_cond_wait(&w->cond, &w->mutex); + pthread_mutex_unlock(&w->mutex); + if (*w->done < 0) + return NULL; + + gem_sync(w->fd, w->object.handle); + for (int n = 0; n < ARRAY_SIZE(w->handles); n++) + gem_sync(w->fd, w->handles[n]); + } while (1); +} + +static void +__store_many(int fd, unsigned ring, int timeout, unsigned long *cycles) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object[2]; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_relocation_entry reloc[1024]; + struct waiter threads[64]; + int order[64]; + uint32_t *batch, *b; + int done; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(object); + execbuf.flags = ring; + execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; + execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; + if (gen < 6) + execbuf.flags |= I915_EXEC_SECURE; + + memset(object, 0, sizeof(object)); + object[0].handle = gem_create(fd, 4096); + gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe)); + execbuf.buffer_count = 1; + gem_execbuf(fd, &execbuf); + object[0].flags |= EXEC_OBJECT_WRITE; + + object[1].relocs_ptr = to_user_pointer(reloc); + object[1].relocation_count = 1024; + execbuf.buffer_count = 2; + + memset(reloc, 0, sizeof(reloc)); + b = batch = malloc(20*1024); + for (int i = 0; i < 1024; i++) { + uint64_t offset; + + reloc[i].presumed_offset = object[0].offset; + reloc[i].offset = (b - batch + 1) * sizeof(*batch); + reloc[i].delta = i * sizeof(uint32_t); + reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; + reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; + + offset = object[0].offset + reloc[i].delta; + *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + *b++ = offset; + *b++ = offset >> 32; + } else if (gen >= 4) { + *b++ = 0; + *b++ = offset; + reloc[i].offset += sizeof(*batch); + } else { + b[-1] -= 1; + *b++ = offset; + } + *b++ = i; + } + *b++ = MI_BATCH_BUFFER_END; + igt_assert((b - batch)*sizeof(uint32_t) < 20*1024); + + done = 0; + for (int i = 0; i < ARRAY_SIZE(threads); i++) { + threads[i].fd = fd; + threads[i].object = object[1]; + threads[i].object.handle = gem_create(fd, 20*1024); + gem_write(fd, threads[i].object.handle, 0, batch, 20*1024); + + pthread_cond_init(&threads[i].cond, NULL); + pthread_mutex_init(&threads[i].mutex, NULL); + threads[i].done = &done; + threads[i].ready = 0; + + pthread_create(&threads[i].thread, NULL, waiter, &threads[i]); + order[i] = i; + } + free(batch); + + for (int i = 0; i < ARRAY_SIZE(threads); i++) { + for (int j = 0; j < ARRAY_SIZE(threads); j++) + threads[i].handles[j] = threads[j].object.handle; + } + + igt_until_timeout(timeout) { + for (int i = 0; i < ARRAY_SIZE(threads); i++) { + pthread_mutex_lock(&threads[i].mutex); + while (threads[i].ready) + pthread_cond_wait(&threads[i].cond, + &threads[i].mutex); + pthread_mutex_unlock(&threads[i].mutex); + igt_permute_array(threads[i].handles, + ARRAY_SIZE(threads[i].handles), + xchg); + } + + igt_permute_array(order, ARRAY_SIZE(threads), xchg); + for (int i = 0; i < ARRAY_SIZE(threads); i++) { + object[1] = threads[i].object; + gem_execbuf(fd, &execbuf); + threads[i].object = object[1]; + } + ++*cycles; + + for (int i = 0; i < ARRAY_SIZE(threads); i++) { + struct waiter *w = &threads[order[i]]; + + w->ready = 1; + pthread_cond_signal(&w->cond); + } + } + + for (int i = 0; i < ARRAY_SIZE(threads); i++) { + pthread_mutex_lock(&threads[i].mutex); + while (threads[i].ready) + pthread_cond_wait(&threads[i].cond, &threads[i].mutex); + pthread_mutex_unlock(&threads[i].mutex); + } + done = -1; + for (int i = 0; i < ARRAY_SIZE(threads); i++) { + threads[i].ready = 1; + pthread_cond_signal(&threads[i].cond); + pthread_join(threads[i].thread, NULL); + gem_close(fd, threads[i].object.handle); + } + + gem_close(fd, object[0].handle); +} + +static void +store_many(int fd, unsigned ring, int timeout) +{ + unsigned long *shared; + const char *names[16]; + int n = 0; + + shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + igt_assert(shared != MAP_FAILED); + + intel_detect_and_clear_missed_interrupts(fd); + + if (ring == ALL_ENGINES) { + for_each_physical_engine(fd, ring) { + if (!gem_can_store_dword(fd, ring)) + continue; + + igt_fork(child, 1) + __store_many(fd, + ring, + timeout, + &shared[n]); + + names[n++] = e__->name; + } + igt_waitchildren(); + } else { + gem_require_ring(fd, ring); + igt_require(gem_can_store_dword(fd, ring)); + __store_many(fd, ring, timeout, &shared[n]); + names[n++] = NULL; + } + + for (int i = 0; i < n; i++) { + igt_info("%s%sompleted %ld cycles\n", + names[i] ?: "", names[i] ? " c" : "C", shared[i]); + } + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); + munmap(shared, 4096); +} + +static void +sync_all(int fd, int num_children, int timeout) +{ + unsigned engines[16], engine; + int num_engines = 0; + + for_each_physical_engine(fd, engine) { + engines[num_engines++] = engine; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + igt_require(num_engines); + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_children) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object; + struct drm_i915_gem_execbuffer2 execbuf; + double start, elapsed; + unsigned long cycles; + + memset(&object, 0, sizeof(object)); + object.handle = gem_create(fd, 4096); + gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(&object); + execbuf.buffer_count = 1; + gem_execbuf(fd, &execbuf); + gem_sync(fd, object.handle); + + start = gettime(); + cycles = 0; + do { + do { + for (int n = 0; n < num_engines; n++) { + execbuf.flags = engines[n]; + gem_execbuf(fd, &execbuf); + } + gem_sync(fd, object.handle); + } while (++cycles & 1023); + } while ((elapsed = gettime() - start) < timeout); + igt_info("Completed %ld cycles: %.3f us\n", + cycles, elapsed*1e6/cycles); + + gem_close(fd, object.handle); + } + igt_waitchildren_timeout(timeout+10, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + +static void +store_all(int fd, int num_children, int timeout) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + unsigned engines[16]; + int num_engines = 0; + unsigned int ring; + + for_each_physical_engine(fd, ring) { + if (!gem_can_store_dword(fd, ring)) + continue; + + engines[num_engines++] = ring; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + igt_require(num_engines); + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_children) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object[2]; + struct drm_i915_gem_relocation_entry reloc[1024]; + struct drm_i915_gem_execbuffer2 execbuf; + double start, elapsed; + unsigned long cycles; + uint32_t *batch, *b; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(object); + execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; + execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; + if (gen < 6) + execbuf.flags |= I915_EXEC_SECURE; + + memset(object, 0, sizeof(object)); + object[0].handle = gem_create(fd, 4096); + gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe)); + execbuf.buffer_count = 1; + gem_execbuf(fd, &execbuf); + + object[0].flags |= EXEC_OBJECT_WRITE; + object[1].handle = gem_create(fd, 1024*16 + 4096); + + object[1].relocs_ptr = to_user_pointer(reloc); + object[1].relocation_count = 1024; + + batch = gem_mmap__cpu(fd, object[1].handle, 0, 16*1024 + 4096, + PROT_WRITE | PROT_READ); + gem_set_domain(fd, object[1].handle, + I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + + memset(reloc, 0, sizeof(reloc)); + b = batch; + for (int i = 0; i < 1024; i++) { + uint64_t offset; + + reloc[i].presumed_offset = object[0].offset; + reloc[i].offset = (b - batch + 1) * sizeof(*batch); + reloc[i].delta = i * sizeof(uint32_t); + reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; + reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; + + offset = object[0].offset + reloc[i].delta; + *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + *b++ = offset; + *b++ = offset >> 32; + } else if (gen >= 4) { + *b++ = 0; + *b++ = offset; + reloc[i].offset += sizeof(*batch); + } else { + b[-1] -= 1; + *b++ = offset; + } + *b++ = i; + } + *b++ = MI_BATCH_BUFFER_END; + igt_assert((b - batch)*sizeof(uint32_t) < 20*1024); + munmap(batch, 16*1024+4096); + execbuf.buffer_count = 2; + gem_execbuf(fd, &execbuf); + gem_sync(fd, object[1].handle); + + start = gettime(); + cycles = 0; + do { + do { + igt_permute_array(engines, num_engines, xchg); + for (int n = 0; n < num_engines; n++) { + execbuf.flags &= ~ENGINE_MASK; + execbuf.flags |= engines[n]; + gem_execbuf(fd, &execbuf); + } + gem_sync(fd, object[1].handle); + } while (++cycles & 1023); + } while ((elapsed = gettime() - start) < timeout); + igt_info("Completed %ld cycles: %.3f us\n", + cycles, elapsed*1e6/cycles); + + gem_close(fd, object[1].handle); + gem_close(fd, object[0].handle); + } + igt_waitchildren_timeout(timeout+10, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); +} + +static void +preempt(int fd, unsigned ring, int num_children, int timeout) +{ + unsigned engines[16]; + const char *names[16]; + int num_engines = 0; + uint32_t ctx[2]; + + if (ring == ALL_ENGINES) { + for_each_physical_engine(fd, ring) { + names[num_engines] = e__->name; + engines[num_engines++] = ring; + if (num_engines == ARRAY_SIZE(engines)) + break; + } + + num_children *= num_engines; + } else { + gem_require_ring(fd, ring); + names[num_engines] = NULL; + engines[num_engines++] = ring; + } + + ctx[0] = gem_context_create(fd); + gem_context_set_priority(fd, ctx[0], MIN_PRIO); + + ctx[1] = gem_context_create(fd); + gem_context_set_priority(fd, ctx[1], MAX_PRIO); + + intel_detect_and_clear_missed_interrupts(fd); + igt_fork(child, num_children) { + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct drm_i915_gem_exec_object2 object; + struct drm_i915_gem_execbuffer2 execbuf; + double start, elapsed; + unsigned long cycles; + + memset(&object, 0, sizeof(object)); + object.handle = gem_create(fd, 4096); + gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(&object); + execbuf.buffer_count = 1; + execbuf.flags = engines[child % num_engines]; + execbuf.rsvd1 = ctx[1]; + gem_execbuf(fd, &execbuf); + gem_sync(fd, object.handle); + + start = gettime(); + cycles = 0; + do { + igt_spin_t *spin = + __igt_spin_batch_new(fd, + .ctx = ctx[0], + .engine = execbuf.flags); + + do { + gem_execbuf(fd, &execbuf); + gem_sync(fd, object.handle); + } while (++cycles & 1023); + + igt_spin_batch_free(fd, spin); + } while ((elapsed = gettime() - start) < timeout); + igt_info("%s%sompleted %ld cycles: %.3f us\n", + names[child % num_engines] ?: "", + names[child % num_engines] ? " c" : "C", + cycles, elapsed*1e6/cycles); + + gem_close(fd, object.handle); + } + igt_waitchildren_timeout(timeout+10, NULL); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); + + gem_context_destroy(fd, ctx[1]); + gem_context_destroy(fd, ctx[0]); +} + +igt_main +{ + const struct intel_execution_engine *e; + const int ncpus = sysconf(_SC_NPROCESSORS_ONLN); + int fd = -1; + + igt_skip_on_simulation(); + + igt_fixture { + fd = drm_open_driver(DRIVER_INTEL); + igt_require_gem(fd); + gem_submission_print_method(fd); + gem_scheduler_print_capability(fd); + + igt_fork_hang_detector(fd); + } + + for (e = intel_execution_engines; e->name; e++) { + igt_subtest_f("%s", e->name) + sync_ring(fd, e->exec_id | e->flags, 1, 150); + igt_subtest_f("idle-%s", e->name) + idle_ring(fd, e->exec_id | e->flags, 150); + igt_subtest_f("active-%s", e->name) + active_ring(fd, e->exec_id | e->flags, 150); + igt_subtest_f("wakeup-%s", e->name) + wakeup_ring(fd, e->exec_id | e->flags, 150, 1); + igt_subtest_f("active-wakeup-%s", e->name) + active_wakeup_ring(fd, e->exec_id | e->flags, 150, 1); + igt_subtest_f("double-wakeup-%s", e->name) + wakeup_ring(fd, e->exec_id | e->flags, 150, 2); + igt_subtest_f("store-%s", e->name) + store_ring(fd, e->exec_id | e->flags, 1, 150); + igt_subtest_f("switch-%s", e->name) + switch_ring(fd, e->exec_id | e->flags, 1, 150); + igt_subtest_f("forked-switch-%s", e->name) + switch_ring(fd, e->exec_id | e->flags, ncpus, 150); + igt_subtest_f("many-%s", e->name) + store_many(fd, e->exec_id | e->flags, 150); + igt_subtest_f("forked-%s", e->name) + sync_ring(fd, e->exec_id | e->flags, ncpus, 150); + igt_subtest_f("forked-store-%s", e->name) + store_ring(fd, e->exec_id | e->flags, ncpus, 150); + } + + igt_subtest("basic-each") + sync_ring(fd, ALL_ENGINES, 1, 5); + igt_subtest("basic-store-each") + store_ring(fd, ALL_ENGINES, 1, 5); + igt_subtest("basic-many-each") + store_many(fd, ALL_ENGINES, 5); + igt_subtest("switch-each") + switch_ring(fd, ALL_ENGINES, 1, 150); + igt_subtest("forked-switch-each") + switch_ring(fd, ALL_ENGINES, ncpus, 150); + igt_subtest("forked-each") + sync_ring(fd, ALL_ENGINES, ncpus, 150); + igt_subtest("forked-store-each") + store_ring(fd, ALL_ENGINES, ncpus, 150); + igt_subtest("active-each") + active_ring(fd, ALL_ENGINES, 150); + igt_subtest("wakeup-each") + wakeup_ring(fd, ALL_ENGINES, 150, 1); + igt_subtest("active-wakeup-each") + active_wakeup_ring(fd, ALL_ENGINES, 150, 1); + igt_subtest("double-wakeup-each") + wakeup_ring(fd, ALL_ENGINES, 150, 2); + + igt_subtest("basic-all") + sync_all(fd, 1, 5); + igt_subtest("basic-store-all") + store_all(fd, 1, 5); + + igt_subtest("all") + sync_all(fd, 1, 150); + igt_subtest("store-all") + store_all(fd, 1, 150); + igt_subtest("forked-all") + sync_all(fd, ncpus, 150); + igt_subtest("forked-store-all") + store_all(fd, ncpus, 150); + + igt_subtest_group { + igt_fixture { + gem_require_contexts(fd); + igt_require(gem_scheduler_has_ctx_priority(fd)); + igt_require(gem_scheduler_has_preemption(fd)); + } + + igt_subtest("preempt-all") + preempt(fd, ALL_ENGINES, 1, 20); + + for (e = intel_execution_engines; e->name; e++) { + igt_subtest_f("preempt-%s", e->name) + preempt(fd, e->exec_id | e->flags, ncpus, 150); + } + } + + igt_fixture { + igt_stop_hang_detector(); + close(fd); + } +} |