/* * Copyright © 2016 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include #include "i915/gem.h" #include "i915/gem_create.h" #include "i915/gem_ring.h" #include "igt_debugfs.h" #include "igt_dummyload.h" #include "igt_gt.h" #include "igt.h" #include "igt_sysfs.h" #define MAX_PRIO I915_CONTEXT_MAX_USER_PRIORITY #define MIN_PRIO I915_CONTEXT_MIN_USER_PRIORITY #define ENGINE_MASK (I915_EXEC_RING_MASK | I915_EXEC_BSD_MASK) IGT_TEST_DESCRIPTION("Basic check of ring<->ring write synchronisation."); /* * Testcase: Basic check of sync * * Extremely efficient at catching missed irqs */ static double gettime(void) { static clockid_t clock = -1; struct timespec ts; /* Stay on the same clock for consistency. */ if (clock != (clockid_t)-1) { if (clock_gettime(clock, &ts)) goto error; goto out; } #ifdef CLOCK_MONOTONIC_RAW if (!clock_gettime(clock = CLOCK_MONOTONIC_RAW, &ts)) goto out; #endif #ifdef CLOCK_MONOTONIC_COARSE if (!clock_gettime(clock = CLOCK_MONOTONIC_COARSE, &ts)) goto out; #endif if (!clock_gettime(clock = CLOCK_MONOTONIC, &ts)) goto out; error: igt_warn("Could not read monotonic time: %s\n", strerror(errno)); igt_assert(0); return 0; out: return ts.tv_sec + 1e-9*ts.tv_nsec; } static void filter_engines_can_store_dword(int fd, struct intel_engine_data *ied) { unsigned int count = 0; for (unsigned int n = 0; n < ied->nengines; n++) { if (!gem_class_can_store_dword(fd, ied->engines[n].class)) continue; if (count != n) memcpy(&ied->engines[count], &ied->engines[n], sizeof(ied->engines[0])); count++; } ied->nengines = count; } static struct intel_engine_data list_engines(int fd, const intel_ctx_t *ctx, unsigned ring) { struct intel_engine_data ied = { }; if (ring == ALL_ENGINES) { ied = intel_engine_list_for_ctx_cfg(fd, &ctx->cfg); } else { if (ctx->cfg.num_engines) igt_assert(ring < ctx->cfg.num_engines); else igt_assert(gem_has_ring(fd, ring)); ied.engines[ied.nengines].flags = ring; strcpy(ied.engines[ied.nengines].name, " "); ied.nengines++; } return ied; } static struct intel_engine_data list_store_engines(int fd, const intel_ctx_t *ctx, unsigned ring) { struct intel_engine_data ied = list_engines(fd, ctx, ring); filter_engines_can_store_dword(fd, &ied); return ied; } static const char *ied_name(const struct intel_engine_data *ied, int idx) { return ied->engines[idx % ied->nengines].name; } static unsigned int ied_flags(const struct intel_engine_data *ied, int idx) { return ied->engines[idx % ied->nengines].flags; } static void xchg_engine(void *array, unsigned i, unsigned j) { struct intel_execution_engine2 *E = array; igt_swap(E[i], E[j]); } static void sync_ring(int fd, const intel_ctx_t *ctx, unsigned ring, int num_children, int timeout) { struct intel_engine_data ied; ied = list_engines(fd, ctx, ring); igt_require(ied.nengines); num_children *= ied.nengines; intel_detect_and_clear_missed_interrupts(fd); igt_fork(child, num_children) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 object; struct drm_i915_gem_execbuffer2 execbuf; double start, elapsed; unsigned long cycles; memset(&object, 0, sizeof(object)); object.handle = gem_create(fd, 4096); gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(&object); execbuf.buffer_count = 1; execbuf.flags = ied_flags(&ied, child); execbuf.rsvd1 = ctx->id; gem_execbuf(fd, &execbuf); gem_sync(fd, object.handle); start = gettime(); cycles = 0; do { do { gem_execbuf(fd, &execbuf); gem_sync(fd, object.handle); } while (++cycles & 1023); } while ((elapsed = gettime() - start) < timeout); igt_info("%s %ld cycles: %.3f us\n", ied_name(&ied, child), cycles, elapsed * 1e6 / cycles); gem_close(fd, object.handle); } igt_waitchildren_timeout(timeout+10, NULL); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } static void idle_ring(int fd, const intel_ctx_t *ctx, unsigned int ring, int num_children, int timeout) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 object; struct drm_i915_gem_execbuffer2 execbuf; double start, elapsed; unsigned long cycles; gem_require_ring(fd, ring); memset(&object, 0, sizeof(object)); object.handle = gem_create(fd, 4096); gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(&object); execbuf.buffer_count = 1; execbuf.flags = ring; execbuf.rsvd1 = ctx->id; gem_execbuf(fd, &execbuf); gem_sync(fd, object.handle); intel_detect_and_clear_missed_interrupts(fd); start = gettime(); cycles = 0; do { do { gem_execbuf(fd, &execbuf); gem_quiescent_gpu(fd); } while (++cycles & 1023); } while ((elapsed = gettime() - start) < timeout); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); igt_info("Completed %ld cycles: %.3f us\n", cycles, elapsed * 1e6 / cycles); gem_close(fd, object.handle); } static void wakeup_ring(int fd, const intel_ctx_t *ctx, unsigned ring, int timeout, int wlen) { struct intel_engine_data ied; uint64_t ahnd = get_reloc_ahnd(fd, ctx->id); ied = list_store_engines(fd, ctx, ring); igt_require(ied.nengines); intel_detect_and_clear_missed_interrupts(fd); igt_fork(child, ied.nengines) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 object; struct drm_i915_gem_execbuffer2 execbuf; double end, this, elapsed, now, baseline; unsigned long cycles; igt_spin_t *spin; ahnd = get_reloc_ahnd(fd, ctx->id); memset(&object, 0, sizeof(object)); object.handle = gem_create(fd, 4096); object.offset = get_offset(ahnd, object.handle, 4096, 0); if (ahnd) object.flags = EXEC_OBJECT_PINNED; gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(&object); execbuf.buffer_count = 1; execbuf.flags = ied_flags(&ied, child); execbuf.rsvd1 = ctx->id; spin = __igt_spin_new(fd, .ahnd = ahnd, .ctx = ctx, .engine = execbuf.flags, .flags = (IGT_SPIN_POLL_RUN | IGT_SPIN_FAST)); igt_assert(igt_spin_has_poll(spin)); gem_execbuf(fd, &execbuf); igt_spin_end(spin); gem_sync(fd, object.handle); for (int warmup = 0; warmup <= 1; warmup++) { end = gettime() + timeout/10.; elapsed = 0; cycles = 0; do { igt_spin_reset(spin); gem_execbuf(fd, &spin->execbuf); igt_spin_busywait_until_started(spin); this = gettime(); igt_spin_end(spin); gem_sync(fd, spin->handle); now = gettime(); elapsed += now - this; cycles++; } while (now < end); baseline = elapsed / cycles; } igt_info("%s baseline %ld cycles: %.3f us\n", ied_name(&ied, child), cycles, elapsed * 1e6 / cycles); end = gettime() + timeout; elapsed = 0; cycles = 0; do { igt_spin_reset(spin); gem_execbuf(fd, &spin->execbuf); igt_spin_busywait_until_started(spin); for (int n = 0; n < wlen; n++) gem_execbuf(fd, &execbuf); this = gettime(); igt_spin_end(spin); gem_sync(fd, object.handle); now = gettime(); elapsed += now - this; cycles++; } while (now < end); elapsed -= cycles * baseline; igt_info("%s completed %ld cycles: %.3f + %.3f us\n", ied_name(&ied, child), cycles, 1e6 * baseline, elapsed * 1e6 / cycles); igt_spin_free(fd, spin); gem_close(fd, object.handle); put_offset(ahnd, object.handle); put_ahnd(ahnd); } igt_waitchildren_timeout(2*timeout, NULL); put_ahnd(ahnd); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } static void active_ring(int fd, const intel_ctx_t *ctx, unsigned int ring, int num_children, int timeout) { struct intel_engine_data ied; uint64_t ahnd = get_reloc_ahnd(fd, ctx->id); ied = list_store_engines(fd, ctx, ring); igt_require(ied.nengines); intel_detect_and_clear_missed_interrupts(fd); igt_fork(child, ied.nengines) { double start, end, elapsed; unsigned long cycles; igt_spin_t *spin[2]; ahnd = get_reloc_ahnd(fd, ctx->id); spin[0] = __igt_spin_new(fd, .ahnd = ahnd, .ctx = ctx, .engine = ied_flags(&ied, child), .flags = IGT_SPIN_FAST); spin[1] = __igt_spin_new(fd, .ahnd = ahnd, .ctx = ctx, .engine = ied_flags(&ied, child), .flags = IGT_SPIN_FAST); start = gettime(); end = start + timeout; cycles = 0; do { for (int loop = 0; loop < 1024; loop++) { igt_spin_t *s = spin[loop & 1]; igt_spin_end(s); gem_sync(fd, s->handle); igt_spin_reset(s); gem_execbuf(fd, &s->execbuf); } cycles += 1024; } while ((elapsed = gettime()) < end); igt_spin_free(fd, spin[1]); igt_spin_free(fd, spin[0]); put_ahnd(ahnd); igt_info("%s %ld cycles: %.3f us\n", ied_name(&ied, child), cycles, (elapsed - start) * 1e6 / cycles); } igt_waitchildren_timeout(2*timeout, NULL); put_ahnd(ahnd); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } static void active_wakeup_ring(int fd, const intel_ctx_t *ctx, unsigned ring, int timeout, int wlen) { struct intel_engine_data ied; uint64_t ahnd0 = get_reloc_ahnd(fd, 0); uint64_t ahnd = get_reloc_ahnd(fd, ctx->id); ied = list_store_engines(fd, ctx, ring); igt_require(ied.nengines); intel_detect_and_clear_missed_interrupts(fd); igt_fork(child, ied.nengines) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 object; struct drm_i915_gem_execbuffer2 execbuf; double end, this, elapsed, now, baseline; unsigned long cycles; igt_spin_t *spin[2]; ahnd0 = get_reloc_ahnd(fd, 0); ahnd = get_reloc_ahnd(fd, ctx->id); memset(&object, 0, sizeof(object)); object.handle = gem_create(fd, 4096); object.offset = get_offset(ahnd, object.handle, 4096, 0); if (ahnd) object.offset = EXEC_OBJECT_PINNED; gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(&object); execbuf.buffer_count = 1; execbuf.flags = ied_flags(&ied, child); execbuf.rsvd1 = ctx->id; spin[0] = __igt_spin_new(fd, .ahnd = ahnd0, .engine = execbuf.flags, .flags = (IGT_SPIN_POLL_RUN | IGT_SPIN_FAST)); igt_assert(igt_spin_has_poll(spin[0])); spin[1] = __igt_spin_new(fd, .ahnd = ahnd0, .engine = execbuf.flags, .flags = (IGT_SPIN_POLL_RUN | IGT_SPIN_FAST)); gem_execbuf(fd, &execbuf); igt_spin_end(spin[1]); igt_spin_end(spin[0]); gem_sync(fd, object.handle); for (int warmup = 0; warmup <= 1; warmup++) { igt_spin_reset(spin[0]); gem_execbuf(fd, &spin[0]->execbuf); end = gettime() + timeout/10.; elapsed = 0; cycles = 0; do { igt_spin_busywait_until_started(spin[0]); igt_spin_reset(spin[1]); gem_execbuf(fd, &spin[1]->execbuf); this = gettime(); igt_spin_end(spin[0]); gem_sync(fd, spin[0]->handle); now = gettime(); elapsed += now - this; cycles++; igt_swap(spin[0], spin[1]); } while (now < end); igt_spin_end(spin[0]); baseline = elapsed / cycles; } igt_info("%s baseline %ld cycles: %.3f us\n", ied_name(&ied, child), cycles, elapsed * 1e6 /cycles); igt_spin_reset(spin[0]); gem_execbuf(fd, &spin[0]->execbuf); end = gettime() + timeout; elapsed = 0; cycles = 0; do { igt_spin_busywait_until_started(spin[0]); for (int n = 0; n < wlen; n++) gem_execbuf(fd, &execbuf); igt_spin_reset(spin[1]); gem_execbuf(fd, &spin[1]->execbuf); this = gettime(); igt_spin_end(spin[0]); gem_sync(fd, object.handle); now = gettime(); elapsed += now - this; cycles++; igt_swap(spin[0], spin[1]); } while (now < end); igt_spin_end(spin[0]); elapsed -= cycles * baseline; igt_info("%s completed %ld cycles: %.3f + %.3f us\n", ied_name(&ied, child), cycles, 1e6 * baseline, elapsed * 1e6 / cycles); igt_spin_free(fd, spin[1]); igt_spin_free(fd, spin[0]); gem_close(fd, object.handle); put_offset(ahnd, object.handle); put_ahnd(ahnd); put_ahnd(ahnd0); } igt_waitchildren_timeout(2*timeout, NULL); put_ahnd(ahnd); put_ahnd(ahnd0); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } static void store_ring(int fd, const intel_ctx_t *ctx, unsigned ring, int num_children, int timeout) { const unsigned int gen = intel_gen(intel_get_drm_devid(fd)); struct intel_engine_data ied; bool has_relocs = gem_has_relocations(fd); ied = list_store_engines(fd, ctx, ring); igt_require(ied.nengines); num_children *= ied.nengines; intel_detect_and_clear_missed_interrupts(fd); igt_fork(child, num_children) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 object[2]; struct drm_i915_gem_relocation_entry reloc[1024]; struct drm_i915_gem_execbuffer2 execbuf; double start, elapsed; unsigned long cycles; uint32_t *batch, *b; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(object); execbuf.flags = ied_flags(&ied, child); execbuf.flags |= I915_EXEC_NO_RELOC; execbuf.flags |= I915_EXEC_HANDLE_LUT; if (gen < 6) execbuf.flags |= I915_EXEC_SECURE; execbuf.rsvd1 = ctx->id; memset(object, 0, sizeof(object)); object[0].handle = gem_create(fd, 4096); gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe)); execbuf.buffer_count = 1; gem_execbuf(fd, &execbuf); object[0].flags |= EXEC_OBJECT_WRITE; object[0].flags |= has_relocs ? 0 : EXEC_OBJECT_PINNED; object[1].handle = gem_create(fd, 20*1024); object[1].relocs_ptr = to_user_pointer(reloc); object[1].relocation_count = has_relocs ? 1024 : 0; batch = gem_mmap__cpu(fd, object[1].handle, 0, 20*1024, PROT_WRITE | PROT_READ); gem_set_domain(fd, object[1].handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); memset(reloc, 0, sizeof(reloc)); b = batch; for (int i = 0; i < 1024; i++) { uint64_t offset; reloc[i].presumed_offset = object[0].offset; reloc[i].offset = (b - batch + 1) * sizeof(*batch); reloc[i].delta = i * sizeof(uint32_t); reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; offset = object[0].offset + reloc[i].delta; *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { *b++ = offset; *b++ = offset >> 32; } else if (gen >= 4) { *b++ = 0; *b++ = offset; reloc[i].offset += sizeof(*batch); } else { b[-1] -= 1; *b++ = offset; } *b++ = i; } *b++ = MI_BATCH_BUFFER_END; igt_assert((b - batch)*sizeof(uint32_t) < 20*1024); munmap(batch, 20*1024); execbuf.buffer_count = 2; gem_execbuf(fd, &execbuf); gem_sync(fd, object[1].handle); start = gettime(); cycles = 0; do { do { gem_execbuf(fd, &execbuf); gem_sync(fd, object[1].handle); } while (++cycles & 1023); } while ((elapsed = gettime() - start) < timeout); igt_info("%s completed %ld cycles: %.3f us\n", ied_name(&ied, child), cycles, elapsed *1e6 / cycles); gem_close(fd, object[1].handle); gem_close(fd, object[0].handle); } igt_waitchildren_timeout(timeout+10, NULL); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } static void switch_ring(int fd, const intel_ctx_t *ctx, unsigned ring, int num_children, int timeout) { const unsigned int gen = intel_gen(intel_get_drm_devid(fd)); struct intel_engine_data ied; bool has_relocs = gem_has_relocations(fd); gem_require_contexts(fd); ied = list_store_engines(fd, ctx, ring); igt_require(ied.nengines); num_children *= ied.nengines; intel_detect_and_clear_missed_interrupts(fd); igt_fork(child, num_children) { struct context { struct drm_i915_gem_exec_object2 object[2]; struct drm_i915_gem_relocation_entry reloc[1024]; struct drm_i915_gem_execbuffer2 execbuf; const intel_ctx_t *ctx; } contexts[2]; double elapsed, baseline; unsigned long cycles; for (int i = 0; i < ARRAY_SIZE(contexts); i++) { const uint32_t bbe = MI_BATCH_BUFFER_END; const uint32_t sz = 32 << 10; struct context *c = &contexts[i]; uint32_t *batch, *b; memset(&c->execbuf, 0, sizeof(c->execbuf)); c->execbuf.buffers_ptr = to_user_pointer(c->object); c->execbuf.flags = ied_flags(&ied, child); c->execbuf.flags |= I915_EXEC_NO_RELOC; c->execbuf.flags |= I915_EXEC_HANDLE_LUT; if (gen < 6) c->execbuf.flags |= I915_EXEC_SECURE; c->ctx = intel_ctx_create(fd, &ctx->cfg); c->execbuf.rsvd1 = c->ctx->id; memset(c->object, 0, sizeof(c->object)); c->object[0].handle = gem_create(fd, 4096); gem_write(fd, c->object[0].handle, 0, &bbe, sizeof(bbe)); c->execbuf.buffer_count = 1; gem_execbuf(fd, &c->execbuf); c->object[0].flags |= EXEC_OBJECT_WRITE; c->object[1].handle = gem_create(fd, sz); c->object[1].relocs_ptr = to_user_pointer(c->reloc); c->object[1].relocation_count = has_relocs ? 1024 * i : 0; batch = gem_mmap__cpu(fd, c->object[1].handle, 0, sz, PROT_WRITE | PROT_READ); gem_set_domain(fd, c->object[1].handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); memset(c->reloc, 0, sizeof(c->reloc)); b = batch; for (int r = 0; r < c->object[1].relocation_count; r++) { uint64_t offset; c->reloc[r].presumed_offset = c->object[0].offset; c->reloc[r].offset = (b - batch + 1) * sizeof(*batch); c->reloc[r].delta = r * sizeof(uint32_t); c->reloc[r].read_domains = I915_GEM_DOMAIN_INSTRUCTION; c->reloc[r].write_domain = I915_GEM_DOMAIN_INSTRUCTION; offset = c->object[0].offset + c->reloc[r].delta; *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { *b++ = offset; *b++ = offset >> 32; } else if (gen >= 4) { *b++ = 0; *b++ = offset; c->reloc[r].offset += sizeof(*batch); } else { b[-1] -= 1; *b++ = offset; } *b++ = r; *b++ = 0x5 << 23; } *b++ = MI_BATCH_BUFFER_END; igt_assert((b - batch)*sizeof(uint32_t) < sz); munmap(batch, sz); c->execbuf.buffer_count = 2; gem_execbuf(fd, &c->execbuf); gem_sync(fd, c->object[1].handle); } cycles = 0; baseline = 0; igt_until_timeout(timeout) { do { double this; gem_execbuf(fd, &contexts[1].execbuf); gem_execbuf(fd, &contexts[0].execbuf); this = gettime(); gem_sync(fd, contexts[1].object[1].handle); gem_sync(fd, contexts[0].object[1].handle); baseline += gettime() - this; } while (++cycles & 1023); } baseline /= cycles; cycles = 0; elapsed = 0; igt_until_timeout(timeout) { do { double this; gem_execbuf(fd, &contexts[1].execbuf); gem_execbuf(fd, &contexts[0].execbuf); this = gettime(); gem_sync(fd, contexts[0].object[1].handle); elapsed += gettime() - this; gem_sync(fd, contexts[1].object[1].handle); } while (++cycles & 1023); } elapsed /= cycles; igt_info("%s completed %ld cycles: %.3f us, baseline %.3f us\n", ied_name(&ied, child), cycles, elapsed * 1e6, baseline * 1e6); for (int i = 0; i < ARRAY_SIZE(contexts); i++) { gem_close(fd, contexts[i].object[1].handle); gem_close(fd, contexts[i].object[0].handle); intel_ctx_destroy(fd, contexts[i].ctx); } } igt_waitchildren_timeout(timeout+10, NULL); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } static void xchg(void *array, unsigned i, unsigned j) { uint32_t *u32 = array; uint32_t tmp = u32[i]; u32[i] = u32[j]; u32[j] = tmp; } struct waiter { pthread_t thread; pthread_mutex_t mutex; pthread_cond_t cond; int ready; volatile int *done; int fd; struct drm_i915_gem_exec_object2 object; uint32_t handles[64]; }; static void *waiter(void *arg) { struct waiter *w = arg; do { pthread_mutex_lock(&w->mutex); w->ready = 0; pthread_cond_signal(&w->cond); while (!w->ready) pthread_cond_wait(&w->cond, &w->mutex); pthread_mutex_unlock(&w->mutex); if (*w->done < 0) return NULL; gem_sync(w->fd, w->object.handle); for (int n = 0; n < ARRAY_SIZE(w->handles); n++) gem_sync(w->fd, w->handles[n]); } while (1); } static void __store_many(int fd, const intel_ctx_t *ctx, unsigned ring, int timeout, unsigned long *cycles) { const unsigned int gen = intel_gen(intel_get_drm_devid(fd)); const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 object[2]; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_relocation_entry reloc[1024]; struct waiter threads[64]; int order[64]; uint32_t *batch, *b; int done; bool has_relocs = gem_has_relocations(fd); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(object); execbuf.flags = ring; execbuf.flags |= I915_EXEC_NO_RELOC; execbuf.flags |= I915_EXEC_HANDLE_LUT; if (gen < 6) execbuf.flags |= I915_EXEC_SECURE; execbuf.rsvd1 = ctx->id; memset(object, 0, sizeof(object)); object[0].handle = gem_create(fd, 4096); gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe)); execbuf.buffer_count = 1; gem_execbuf(fd, &execbuf); object[0].flags |= EXEC_OBJECT_WRITE; object[0].flags |= has_relocs ? 0 : EXEC_OBJECT_PINNED; object[1].relocs_ptr = to_user_pointer(reloc); object[1].relocation_count = has_relocs ? 1024 : 0; execbuf.buffer_count = 2; memset(reloc, 0, sizeof(reloc)); b = batch = malloc(20*1024); for (int i = 0; i < 1024; i++) { uint64_t offset; reloc[i].presumed_offset = object[0].offset; reloc[i].offset = (b - batch + 1) * sizeof(*batch); reloc[i].delta = i * sizeof(uint32_t); reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; offset = object[0].offset + reloc[i].delta; *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { *b++ = offset; *b++ = offset >> 32; } else if (gen >= 4) { *b++ = 0; *b++ = offset; reloc[i].offset += sizeof(*batch); } else { b[-1] -= 1; *b++ = offset; } *b++ = i; } *b++ = MI_BATCH_BUFFER_END; igt_assert((b - batch)*sizeof(uint32_t) < 20*1024); done = 0; for (int i = 0; i < ARRAY_SIZE(threads); i++) { threads[i].fd = fd; threads[i].object = object[1]; threads[i].object.handle = gem_create(fd, 20*1024); gem_write(fd, threads[i].object.handle, 0, batch, 20*1024); pthread_cond_init(&threads[i].cond, NULL); pthread_mutex_init(&threads[i].mutex, NULL); threads[i].done = &done; threads[i].ready = 0; pthread_create(&threads[i].thread, NULL, waiter, &threads[i]); order[i] = i; } free(batch); for (int i = 0; i < ARRAY_SIZE(threads); i++) { for (int j = 0; j < ARRAY_SIZE(threads); j++) threads[i].handles[j] = threads[j].object.handle; } igt_until_timeout(timeout) { for (int i = 0; i < ARRAY_SIZE(threads); i++) { pthread_mutex_lock(&threads[i].mutex); while (threads[i].ready) pthread_cond_wait(&threads[i].cond, &threads[i].mutex); pthread_mutex_unlock(&threads[i].mutex); igt_permute_array(threads[i].handles, ARRAY_SIZE(threads[i].handles), xchg); } igt_permute_array(order, ARRAY_SIZE(threads), xchg); for (int i = 0; i < ARRAY_SIZE(threads); i++) { object[1] = threads[i].object; gem_execbuf(fd, &execbuf); threads[i].object = object[1]; } ++*cycles; for (int i = 0; i < ARRAY_SIZE(threads); i++) { struct waiter *w = &threads[order[i]]; w->ready = 1; pthread_cond_signal(&w->cond); } } for (int i = 0; i < ARRAY_SIZE(threads); i++) { pthread_mutex_lock(&threads[i].mutex); while (threads[i].ready) pthread_cond_wait(&threads[i].cond, &threads[i].mutex); pthread_mutex_unlock(&threads[i].mutex); } done = -1; for (int i = 0; i < ARRAY_SIZE(threads); i++) { threads[i].ready = 1; pthread_cond_signal(&threads[i].cond); pthread_join(threads[i].thread, NULL); gem_close(fd, threads[i].object.handle); } gem_close(fd, object[0].handle); } static void store_many(int fd, const intel_ctx_t *ctx, unsigned int ring, int num_children, int timeout) { struct intel_engine_data ied; unsigned long *shared; shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); igt_assert(shared != MAP_FAILED); ied = list_store_engines(fd, ctx, ring); igt_require(ied.nengines); intel_detect_and_clear_missed_interrupts(fd); for (int n = 0; n < ied.nengines; n++) { igt_fork(child, 1) __store_many(fd, ctx, ied_flags(&ied, n), timeout, &shared[n]); } igt_waitchildren(); for (int n = 0; n < ied.nengines; n++) { igt_info("%s completed %ld cycles\n", ied_name(&ied, n), shared[n]); } igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); munmap(shared, 4096); } static void sync_all(int fd, const intel_ctx_t *ctx, int num_children, int timeout) { struct intel_engine_data ied; ied = list_engines(fd, ctx, ALL_ENGINES); igt_require(ied.nengines); intel_detect_and_clear_missed_interrupts(fd); igt_fork(child, num_children) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 object; struct drm_i915_gem_execbuffer2 execbuf; double start, elapsed; unsigned long cycles; memset(&object, 0, sizeof(object)); object.handle = gem_create(fd, 4096); gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(&object); execbuf.buffer_count = 1; execbuf.rsvd1 = ctx->id; gem_execbuf(fd, &execbuf); gem_sync(fd, object.handle); start = gettime(); cycles = 0; do { do { for (int n = 0; n < ied.nengines; n++) { execbuf.flags = ied_flags(&ied, n); gem_execbuf(fd, &execbuf); } gem_sync(fd, object.handle); } while (++cycles & 1023); } while ((elapsed = gettime() - start) < timeout); igt_info("Completed %ld cycles: %.3f us\n", cycles, elapsed * 1e6 / cycles); gem_close(fd, object.handle); } igt_waitchildren_timeout(timeout+10, NULL); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } static void store_all(int fd, const intel_ctx_t *ctx, int num_children, int timeout) { const unsigned int gen = intel_gen(intel_get_drm_devid(fd)); struct intel_engine_data ied; bool has_relocs = gem_has_relocations(fd); ied = list_store_engines(fd, ctx, ALL_ENGINES); igt_require(ied.nengines); intel_detect_and_clear_missed_interrupts(fd); igt_fork(child, num_children) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 object[2]; struct drm_i915_gem_relocation_entry reloc[1024]; struct drm_i915_gem_execbuffer2 execbuf; double start, elapsed; unsigned long cycles; uint32_t *batch, *b; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(object); execbuf.flags |= I915_EXEC_NO_RELOC; execbuf.flags |= I915_EXEC_HANDLE_LUT; if (gen < 6) execbuf.flags |= I915_EXEC_SECURE; execbuf.rsvd1 = ctx->id; memset(object, 0, sizeof(object)); object[0].handle = gem_create(fd, 4096); gem_write(fd, object[0].handle, 0, &bbe, sizeof(bbe)); execbuf.buffer_count = 1; gem_execbuf(fd, &execbuf); object[0].flags |= EXEC_OBJECT_WRITE; object[0].flags |= has_relocs ? 0 : EXEC_OBJECT_PINNED; object[1].handle = gem_create(fd, 1024*16 + 4096); object[1].relocs_ptr = to_user_pointer(reloc); object[1].relocation_count = has_relocs ? 1024 : 0; batch = gem_mmap__cpu(fd, object[1].handle, 0, 16*1024 + 4096, PROT_WRITE | PROT_READ); gem_set_domain(fd, object[1].handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); memset(reloc, 0, sizeof(reloc)); b = batch; for (int i = 0; i < 1024; i++) { uint64_t offset; reloc[i].presumed_offset = object[0].offset; reloc[i].offset = (b - batch + 1) * sizeof(*batch); reloc[i].delta = i * sizeof(uint32_t); reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; offset = object[0].offset + reloc[i].delta; *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { *b++ = offset; *b++ = offset >> 32; } else if (gen >= 4) { *b++ = 0; *b++ = offset; reloc[i].offset += sizeof(*batch); } else { b[-1] -= 1; *b++ = offset; } *b++ = i; } *b++ = MI_BATCH_BUFFER_END; igt_assert((b - batch)*sizeof(uint32_t) < 20*1024); munmap(batch, 16*1024+4096); execbuf.buffer_count = 2; gem_execbuf(fd, &execbuf); gem_sync(fd, object[1].handle); start = gettime(); cycles = 0; do { do { igt_permute_array(ied.engines, ied.nengines, xchg_engine); for (int n = 0; n < ied.nengines; n++) { execbuf.flags &= ~ENGINE_MASK; execbuf.flags |= ied_flags(&ied, n); gem_execbuf(fd, &execbuf); } gem_sync(fd, object[1].handle); } while (++cycles & 1023); } while ((elapsed = gettime() - start) < timeout); igt_info("Completed %ld cycles: %.3f us\n", cycles, elapsed * 1e6 / cycles); gem_close(fd, object[1].handle); gem_close(fd, object[0].handle); } igt_waitchildren_timeout(timeout+10, NULL); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } static void preempt(int fd, const intel_ctx_t *ctx, unsigned ring, int num_children, int timeout) { struct intel_engine_data ied; const intel_ctx_t *tmp_ctx[2]; uint64_t ahnd = get_reloc_ahnd(fd, 0); /* just offset provider */ ied = list_engines(fd, ctx, ALL_ENGINES); igt_require(ied.nengines); num_children *= ied.nengines; tmp_ctx[0] = intel_ctx_create(fd, &ctx->cfg); gem_context_set_priority(fd, tmp_ctx[0]->id, MIN_PRIO); tmp_ctx[1] = intel_ctx_create(fd, &ctx->cfg); gem_context_set_priority(fd, tmp_ctx[1]->id, MAX_PRIO); intel_detect_and_clear_missed_interrupts(fd); igt_fork(child, num_children) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 object; struct drm_i915_gem_execbuffer2 execbuf; double start, elapsed; unsigned long cycles; ahnd = get_reloc_ahnd(fd, 0); memset(&object, 0, sizeof(object)); object.handle = gem_create(fd, 4096); object.offset = get_offset(ahnd, object.handle, 4096, 0); if (ahnd) object.flags = EXEC_OBJECT_PINNED; gem_write(fd, object.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(&object); execbuf.buffer_count = 1; execbuf.flags = ied_flags(&ied, child); execbuf.rsvd1 = tmp_ctx[1]->id; gem_execbuf(fd, &execbuf); gem_sync(fd, object.handle); start = gettime(); cycles = 0; do { igt_spin_t *spin = __igt_spin_new(fd, .ahnd = ahnd, .ctx = tmp_ctx[0], .engine = execbuf.flags); do { gem_execbuf(fd, &execbuf); gem_sync(fd, object.handle); } while (++cycles & 1023); igt_spin_free(fd, spin); } while ((elapsed = gettime() - start) < timeout); igt_info("%s %ld cycles: %.3f us\n", ied_name(&ied, child), cycles, elapsed * 1e6/cycles); gem_close(fd, object.handle); put_offset(ahnd, object.handle); put_ahnd(ahnd); } igt_waitchildren_timeout(timeout+10, NULL); put_ahnd(ahnd); igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); intel_ctx_destroy(fd, tmp_ctx[1]); intel_ctx_destroy(fd, tmp_ctx[0]); } igt_main { const int ncpus = sysconf(_SC_NPROCESSORS_ONLN); const struct { const char *name; void (*func)(int fd, const intel_ctx_t *ctx, unsigned int engine, int num_children, int timeout); int num_children; int timeout; const char *describe; } all[] = { { "basic-each", sync_ring, 1, 2, "Check synchronisation of ring" }, { "basic-store-each", store_ring, 1, 2, "Check that store synchronisation works" }, { "basic-many-each", store_many, 0, 2, "Create race condition and see if we can" " catch interrupts" }, { "switch-each", switch_ring, 1, 20, "Check sync after context switch" }, { "forked-switch-each", switch_ring, ncpus, 20, "Check sync after context switch parallelly" }, { "forked-each", sync_ring, ncpus, 20, "Forked variant of sync_ring, which checks synchronisation" " of ring with parallel executions" }, { "forked-store-each", store_ring, ncpus, 20, "Forked variant of store_ring, check if parallel store" " synchronisation works" }, { "active-each", active_ring, 0, 20, "Exercise waiting while keeping the GPU busy" }, { "wakeup-each", wakeup_ring, 20, 1, "Stress test for nop + sync" }, { "active-wakeup-each", active_wakeup_ring, 20, 1, "Measure wakeup latency while also scheduling the next batch" }, { "double-wakeup-each", wakeup_ring, 20, 2, "Double stress test for nop + sync" }, {} }, individual[] = { { "default", sync_ring, 1, 20, "Check synchronisation of rings" }, { "idle", idle_ring, 0, 20, "Exercise and measure idle requests" }, { "active", active_ring, 0, 20, "Exercise waiting while keeping the GPU busy" }, { "wakeup", wakeup_ring, 20, 1, "Stress for nop + sync" }, { "active-wakeup", active_wakeup_ring, 20, 1, "Measure wakeup latency while also scheduling the next batch" }, { "double-wakeup", wakeup_ring, 20, 2, "Double stress test for nop + sync" }, { "store", store_ring, 1, 20, "Check that store synchronisation works" }, { "switch", switch_ring, 1, 20, "Check sync after context switch" }, { "forked-switch", switch_ring, ncpus, 20, "Check sync after context switch parallelly" }, { "many", store_many, 0, 20, "Create race condition and see if we can catch interrupts" }, { "forked", sync_ring, ncpus, 20, "Check synchronisation of ring with parallel executions" }, { "forked-store", store_ring, ncpus, 20, "Check store synchronisation works with parallel multiple" " executions" }, {} }; #define for_each_test(t, T) for(typeof(*T) *t = T; t->name; t++) const struct intel_execution_engine2 *e; const intel_ctx_t *ctx; int fd = -1; igt_fixture { fd = drm_open_driver(DRIVER_INTEL); igt_require_gem(fd); gem_submission_print_method(fd); gem_scheduler_print_capability(fd); ctx = intel_ctx_create_all_physical(fd); igt_fork_hang_detector(fd); intel_allocator_multiprocess_start(); } /* Legacy for selecting rings. */ for_each_test(t, individual) { igt_describe_f("%s for each legacy engine.", t->describe); igt_subtest_with_dynamic_f("legacy-%s", t->name) { for (const struct intel_execution_ring *l = intel_execution_rings; l->name; l++) { igt_dynamic_f("%s", l->name) { t->func(fd, intel_ctx_0(fd), eb_ring(l), t->num_children, t->timeout); } } } } igt_describe("Basic test to wait upon a batch on all rings."); igt_subtest("basic-all") sync_all(fd, ctx, 1, 2); igt_describe("Basic version of store synchronisation test."); igt_subtest("basic-store-all") store_all(fd, ctx, 1, 2); igt_describe("Extended version of existing basic-all test."); igt_subtest("all") sync_all(fd, ctx, 1, 20); igt_describe("Extended version of existing basic-store-all test."); igt_subtest("store-all") store_all(fd, ctx, 1, 20); igt_describe("Parallel execution of batch on all rings and then wait."); igt_subtest("forked-all") sync_all(fd, ctx, ncpus, 20); igt_describe("Parallel execution of store synchronisation."); igt_subtest("forked-store-all") store_all(fd, ctx, ncpus, 20); for_each_test(t, all) { igt_describe_f("%s.", t->describe); igt_subtest_f("%s", t->name) t->func(fd, ctx, ALL_ENGINES, t->num_children, t->timeout); } /* New way of selecting engines. */ for_each_test(t, individual) { igt_describe_f("%s on each engine.", t->describe); igt_subtest_with_dynamic_f("%s", t->name) { for_each_ctx_engine(fd, ctx, e) { igt_dynamic_f("%s", e->name) { t->func(fd, ctx, e->flags, t->num_children, t->timeout); } } } } igt_subtest_group { igt_fixture { gem_require_contexts(fd); igt_require(gem_scheduler_has_ctx_priority(fd)); igt_require(gem_scheduler_has_preemption(fd)); } igt_describe("Check and measure how well we can submit a second" " high priority task when the engine is already" " busy with a low priority task on all engines."); igt_subtest("preempt-all") preempt(fd, ctx, ALL_ENGINES, 1, 20); igt_describe("For each context engine check how priority of task are" " submitted when engine is already busy."); igt_subtest_with_dynamic("preempt") { for_each_ctx_engine(fd, ctx, e) { igt_dynamic_f("%s", e->name) preempt(fd, ctx, e->flags, ncpus, 20); } } } igt_fixture { intel_allocator_multiprocess_stop(); igt_stop_hang_detector(); intel_ctx_destroy(fd, ctx); close(fd); } }