diff options
author | Zbigniew Kempczyński <zbigniew.kempczynski@intel.com> | 2021-06-21 08:25:20 +0200 |
---|---|---|
committer | Zbigniew Kempczyński <zbigniew.kempczynski@intel.com> | 2021-08-18 05:23:59 +0200 |
commit | 146260200f9a6d4536e48a195e2ab49a07d4f0c1 (patch) | |
tree | c9b2e0e5a086fd816602749ea1f3e7e7c14af0a5 /tests/i915/gem_exec_schedule.c | |
parent | 0749e425aa523935b6f888e2ece195d1e25ccfaf (diff) |
tests/gem_exec_schedule: Adopt to use allocator
Alter tests to cover reloc and no-reloc (softpin) modes.
v2: fix pi-* subtests
v3: addressing review comments (Ashutosh)
Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Cc: Petri Latvala <petri.latvala@intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Diffstat (limited to 'tests/i915/gem_exec_schedule.c')
-rw-r--r-- | tests/i915/gem_exec_schedule.c | 490 |
1 files changed, 386 insertions, 104 deletions
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c index e5fb4598..62d616f3 100644 --- a/tests/i915/gem_exec_schedule.c +++ b/tests/i915/gem_exec_schedule.c @@ -91,9 +91,11 @@ void __sync_read_u32_count(int fd, uint32_t handle, uint32_t *dst, uint64_t size gem_read(fd, handle, 0, dst, size); } -static uint32_t __store_dword(int fd, const intel_ctx_t *ctx, unsigned ring, - uint32_t target, uint32_t offset, uint32_t value, - uint32_t cork, int fence, unsigned write_domain) +static uint32_t __store_dword(int fd, uint64_t ahnd, const intel_ctx_t *ctx, + unsigned ring, uint32_t target, uint64_t target_offset, + uint32_t offset, uint32_t value, + uint32_t cork, uint64_t cork_offset, + int fence, unsigned write_domain) { const unsigned int gen = intel_gen(intel_get_drm_devid(fd)); struct drm_i915_gem_exec_object2 obj[3]; @@ -117,12 +119,23 @@ static uint32_t __store_dword(int fd, const intel_ctx_t *ctx, unsigned ring, memset(obj, 0, sizeof(obj)); obj[0].handle = cork; - obj[0].offset = cork << 20; obj[1].handle = target; - obj[1].offset = target << 20; obj[2].handle = gem_create(fd, 4096); - obj[2].offset = 256 << 10; - obj[2].offset += (random() % 128) << 12; + if (ahnd) { + obj[0].offset = cork_offset; + obj[0].flags |= EXEC_OBJECT_PINNED; + obj[1].offset = target_offset; + obj[1].flags |= EXEC_OBJECT_PINNED; + if (write_domain) + obj[1].flags |= EXEC_OBJECT_WRITE; + obj[2].offset = get_offset(ahnd, obj[2].handle, 4096, 0); + obj[2].flags |= EXEC_OBJECT_PINNED; + } else { + obj[0].offset = cork << 20; + obj[1].offset = target << 20; + obj[2].offset = 256 << 10; + obj[2].offset += (random() % 128) << 12; + } memset(&reloc, 0, sizeof(reloc)); reloc.target_handle = obj[1].handle; @@ -132,13 +145,13 @@ static uint32_t __store_dword(int fd, const intel_ctx_t *ctx, unsigned ring, reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc.write_domain = write_domain; obj[2].relocs_ptr = to_user_pointer(&reloc); - obj[2].relocation_count = 1; + obj[2].relocation_count = !ahnd ? 1 : 0; i = 0; batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { batch[++i] = reloc.presumed_offset + reloc.delta; - batch[++i] = 0; + batch[++i] = (reloc.presumed_offset + reloc.delta) >> 32; } else if (gen >= 4) { batch[++i] = 0; batch[++i] = reloc.presumed_offset + reloc.delta; @@ -155,31 +168,46 @@ static uint32_t __store_dword(int fd, const intel_ctx_t *ctx, unsigned ring, return obj[2].handle; } -static void store_dword(int fd, const intel_ctx_t *ctx, unsigned ring, - uint32_t target, uint32_t offset, uint32_t value, +static void store_dword(int fd, uint64_t ahnd, const intel_ctx_t *ctx, + unsigned ring, + uint32_t target, uint64_t target_offset, + uint32_t offset, uint32_t value, unsigned write_domain) { - gem_close(fd, __store_dword(fd, ctx, ring, - target, offset, value, - 0, -1, write_domain)); + uint32_t batch = __store_dword(fd, ahnd, ctx, ring, + target, target_offset, offset, value, + 0, 0, -1, write_domain); + gem_close(fd, batch); + put_offset(ahnd, batch); } -static void store_dword_plug(int fd, const intel_ctx_t *ctx, unsigned ring, - uint32_t target, uint32_t offset, uint32_t value, - uint32_t cork, unsigned write_domain) +static void store_dword_plug(int fd, uint64_t ahnd, const intel_ctx_t *ctx, + unsigned ring, + uint32_t target, uint64_t target_offset, + uint32_t offset, uint32_t value, + uint32_t cork, uint64_t cork_offset, + unsigned write_domain) { - gem_close(fd, __store_dword(fd, ctx, ring, - target, offset, value, - cork, -1, write_domain)); + uint32_t batch = __store_dword(fd, ahnd, ctx, ring, + target, target_offset, offset, value, + cork, cork_offset, -1, write_domain); + + gem_close(fd, batch); + put_offset(ahnd, batch); } -static void store_dword_fenced(int fd, const intel_ctx_t *ctx, unsigned ring, - uint32_t target, uint32_t offset, uint32_t value, +static void store_dword_fenced(int fd, uint64_t ahnd, const intel_ctx_t *ctx, + unsigned ring, + uint32_t target, uint64_t target_offset, + uint32_t offset, uint32_t value, int fence, unsigned write_domain) { - gem_close(fd, __store_dword(fd, ctx, ring, - target, offset, value, - 0, fence, write_domain)); + uint32_t batch = __store_dword(fd, ahnd, ctx, ring, + target, target_offset, offset, value, + 0, 0, fence, write_domain); + + gem_close(fd, batch); + put_offset(ahnd, batch); } static const intel_ctx_t * @@ -210,15 +238,21 @@ static void unplug_show_queue(int fd, struct igt_cork *c, for (int n = 0; n < max; n++) { const intel_ctx_t *ctx = create_highest_priority(fd, cfg); - spin[n] = __igt_spin_new(fd, .ctx = ctx, .engine = engine); + uint64_t ahnd = get_reloc_ahnd(fd, ctx->id); + + spin[n] = __igt_spin_new(fd, .ahnd = ahnd, .ctx = ctx, + .engine = engine); intel_ctx_destroy(fd, ctx); } igt_cork_unplug(c); /* batches will now be queued on the engine */ igt_debugfs_dump(fd, "i915_engine_info"); - for (int n = 0; n < max; n++) + for (int n = 0; n < max; n++) { + uint64_t ahnd = spin[n]->ahnd; igt_spin_free(fd, spin[n]); + put_ahnd(ahnd); + } } @@ -228,20 +262,26 @@ static void fifo(int fd, const intel_ctx_t *ctx, unsigned ring) uint32_t scratch; uint32_t result; int fence; + uint64_t ahnd = get_reloc_ahnd(fd, ctx->id), scratch_offset; scratch = gem_create(fd, 4096); + scratch_offset = get_offset(ahnd, scratch, 4096, 0); fence = igt_cork_plug(&cork, fd); /* Same priority, same timeline, final result will be the second eb */ - store_dword_fenced(fd, ctx, ring, scratch, 0, 1, fence, 0); - store_dword_fenced(fd, ctx, ring, scratch, 0, 2, fence, 0); + store_dword_fenced(fd, ahnd, ctx, ring, scratch, scratch_offset, + 0, 1, fence, 0); + store_dword_fenced(fd, ahnd, ctx, ring, scratch, scratch_offset, + 0, 2, fence, 0); unplug_show_queue(fd, &cork, &ctx->cfg, ring); close(fence); result = __sync_read_u32(fd, scratch, 0); gem_close(fd, scratch); + put_offset(ahnd, scratch); + put_ahnd(ahnd); igt_assert_eq_u32(result, 2); } @@ -260,6 +300,7 @@ static void implicit_rw(int i915, const intel_ctx_t *ctx, unsigned int ring, uint32_t scratch; uint32_t result; int fence; + uint64_t ahnd = get_reloc_ahnd(i915, ctx->id), scratch_offset; count = 0; for_each_ctx_engine(i915, ctx, e) { @@ -274,11 +315,12 @@ static void implicit_rw(int i915, const intel_ctx_t *ctx, unsigned int ring, igt_require(count); scratch = gem_create(i915, 4096); + scratch_offset = get_offset(ahnd, scratch, 4096, 0); fence = igt_cork_plug(&cork, i915); if (dir & WRITE_READ) - store_dword_fenced(i915, ctx, - ring, scratch, 0, ~ring, + store_dword_fenced(i915, ahnd, ctx, + ring, scratch, scratch_offset, 0, ~ring, fence, I915_GEM_DOMAIN_RENDER); for_each_ctx_engine(i915, ctx, e) { @@ -288,14 +330,14 @@ static void implicit_rw(int i915, const intel_ctx_t *ctx, unsigned int ring, if (!gem_class_can_store_dword(i915, e->class)) continue; - store_dword_fenced(i915, ctx, - e->flags, scratch, 0, e->flags, + store_dword_fenced(i915, ahnd, ctx, + e->flags, scratch, scratch_offset, 0, e->flags, fence, 0); } if (dir & READ_WRITE) - store_dword_fenced(i915, ctx, - ring, scratch, 0, ring, + store_dword_fenced(i915, ahnd, ctx, + ring, scratch, scratch_offset, 0, ring, fence, I915_GEM_DOMAIN_RENDER); unplug_show_queue(i915, &cork, &ctx->cfg, ring); @@ -303,6 +345,8 @@ static void implicit_rw(int i915, const intel_ctx_t *ctx, unsigned int ring, result = __sync_read_u32(i915, scratch, 0); gem_close(i915, scratch); + put_offset(ahnd, scratch); + put_ahnd(ahnd); if (dir & WRITE_READ) igt_assert_neq_u32(result, ~ring); @@ -319,8 +363,10 @@ static void independent(int fd, const intel_ctx_t *ctx, unsigned int engine, uint32_t scratch, batch; uint32_t *ptr; int fence; + uint64_t ahnd = get_reloc_ahnd(fd, ctx->id), scratch_offset; scratch = gem_create(fd, 4096); + scratch_offset = get_offset(ahnd, scratch, 4096, 0); ptr = gem_mmap__device_coherent(fd, scratch, 0, 4096, PROT_READ); igt_assert_eq(ptr[0], 0); @@ -336,6 +382,7 @@ static void independent(int fd, const intel_ctx_t *ctx, unsigned int engine, if (spin == NULL) { spin = __igt_spin_new(fd, + .ahnd = ahnd, .ctx = ctx, .engine = e->flags, .flags = flags); @@ -348,12 +395,15 @@ static void independent(int fd, const intel_ctx_t *ctx, unsigned int engine, gem_execbuf(fd, &eb); } - store_dword_fenced(fd, ctx, e->flags, scratch, 0, e->flags, fence, 0); + store_dword_fenced(fd, ahnd, ctx, e->flags, + scratch, scratch_offset, + 0, e->flags, fence, 0); } igt_require(spin); /* Same priority, but different timeline (as different engine) */ - batch = __store_dword(fd, ctx, engine, scratch, 0, engine, 0, fence, 0); + batch = __store_dword(fd, ahnd, ctx, engine, scratch, scratch_offset, + 0, engine, 0, 0, fence, 0); unplug_show_queue(fd, &cork, &ctx->cfg, engine); close(fence); @@ -369,6 +419,9 @@ static void independent(int fd, const intel_ctx_t *ctx, unsigned int engine, igt_spin_free(fd, spin); gem_quiescent_gpu(fd); + put_offset(ahnd, batch); + put_offset(ahnd, scratch); + put_ahnd(ahnd); /* And we expect the others to have overwritten us, order unspecified */ igt_assert(!gem_bo_busy(fd, scratch)); @@ -388,6 +441,7 @@ static void smoketest(int fd, const intel_ctx_cfg_t *cfg, unsigned engine; uint32_t scratch; uint32_t result[2 * ncpus]; + uint64_t scratch_offset; nengine = 0; if (ring == ALL_ENGINES) { @@ -400,13 +454,19 @@ static void smoketest(int fd, const intel_ctx_cfg_t *cfg, igt_require(nengine); scratch = gem_create(fd, 4096); + igt_fork(child, ncpus) { unsigned long count = 0; const intel_ctx_t *ctx; + uint64_t ahnd; + + intel_allocator_init(); hars_petruska_f54_1_random_perturb(child); ctx = intel_ctx_create(fd, cfg); + ahnd = get_reloc_ahnd(fd, ctx->id); + scratch_offset = get_offset(ahnd, scratch, 4096, 0); igt_until_timeout(timeout) { int prio; @@ -414,15 +474,18 @@ static void smoketest(int fd, const intel_ctx_cfg_t *cfg, gem_context_set_priority(fd, ctx->id, prio); engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)]; - store_dword(fd, ctx, engine, scratch, - 8*child + 0, ~child, - 0); + store_dword(fd, ahnd, ctx, engine, + scratch, scratch_offset, + 8*child + 0, ~child, 0); for (unsigned int step = 0; step < 8; step++) - store_dword(fd, ctx, engine, scratch, + store_dword(fd, ahnd, ctx, engine, + scratch, scratch_offset, 8*child + 4, count++, 0); } intel_ctx_destroy(fd, ctx); + put_offset(ahnd, scratch); + put_ahnd(ahnd); } igt_waitchildren(); @@ -644,12 +707,15 @@ static void lateslice(int i915, const intel_ctx_cfg_t *cfg, { const intel_ctx_t *ctx; igt_spin_t *spin[3]; + uint64_t ahnd[3]; igt_require(gem_scheduler_has_timeslicing(i915)); igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8); ctx = intel_ctx_create(i915, cfg); - spin[0] = igt_spin_new(i915, .ctx = ctx, .engine = engine, + ahnd[0] = get_reloc_ahnd(i915, ctx->id); + spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx = ctx, + .engine = engine, .flags = (IGT_SPIN_POLL_RUN | IGT_SPIN_FENCE_OUT | flags)); @@ -658,7 +724,9 @@ static void lateslice(int i915, const intel_ctx_cfg_t *cfg, igt_spin_busywait_until_started(spin[0]); ctx = intel_ctx_create(i915, cfg); - spin[1] = igt_spin_new(i915, .ctx = ctx, .engine = engine, + ahnd[1] = get_reloc_ahnd(i915, ctx->id); + spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx = ctx, + .engine = engine, .fence = spin[0]->out_fence, .flags = (IGT_SPIN_POLL_RUN | IGT_SPIN_FENCE_IN | @@ -675,7 +743,9 @@ static void lateslice(int i915, const intel_ctx_cfg_t *cfg, */ ctx = intel_ctx_create(i915, cfg); - spin[2] = igt_spin_new(i915, .ctx = ctx, .engine = engine, + ahnd[2] = get_reloc_ahnd(i915, ctx->id); + spin[2] = igt_spin_new(i915, .ahnd = ahnd[2], .ctx = ctx, + .engine = engine, .flags = IGT_SPIN_POLL_RUN | flags); intel_ctx_destroy(i915, ctx); @@ -696,6 +766,9 @@ static void lateslice(int i915, const intel_ctx_cfg_t *cfg, igt_assert(gem_bo_busy(i915, spin[1]->handle)); igt_spin_free(i915, spin[1]); + + for (int i = 0; i < ARRAY_SIZE(ahnd); i++) + put_ahnd(ahnd[i]); } static void cancel_spinner(int i915, @@ -742,6 +815,7 @@ static void submit_slice(int i915, const intel_ctx_cfg_t *cfg, .num_engines = 1, }; const intel_ctx_t *ctx; + uint64_t ahnd0 = get_reloc_ahnd(i915, 0); /* * When using a submit fence, we do not want to block concurrent work, @@ -755,13 +829,14 @@ static void submit_slice(int i915, const intel_ctx_cfg_t *cfg, igt_spin_t *bg, *spin; int timeline = -1; int fence = -1; + uint64_t ahndN; if (!gem_class_can_store_dword(i915, cancel->class)) continue; igt_debug("Testing cancellation from %s\n", e->name); - bg = igt_spin_new(i915, .engine = e->flags); + bg = igt_spin_new(i915, .ahnd = ahnd0, .engine = e->flags); if (flags & LATE_SUBMIT) { timeline = sw_sync_timeline_create(); @@ -771,7 +846,8 @@ static void submit_slice(int i915, const intel_ctx_cfg_t *cfg, engine_cfg.engines[0].engine_class = e->class; engine_cfg.engines[0].engine_instance = e->instance; ctx = intel_ctx_create(i915, &engine_cfg); - spin = igt_spin_new(i915, .ctx = ctx, + ahndN = get_reloc_ahnd(i915, ctx->id); + spin = igt_spin_new(i915, .ahnd = ahndN, .ctx = ctx, .fence = fence, .flags = IGT_SPIN_POLL_RUN | @@ -800,7 +876,10 @@ static void submit_slice(int i915, const intel_ctx_cfg_t *cfg, igt_spin_free(i915, bg); intel_ctx_destroy(i915, ctx); + put_ahnd(ahndN); } + + put_ahnd(ahnd0); } static uint32_t __batch_create(int i915, uint32_t offset) @@ -829,6 +908,7 @@ static void semaphore_userlock(int i915, const intel_ctx_t *ctx, igt_spin_t *spin = NULL; uint32_t scratch; const intel_ctx_t *tmp_ctx; + uint64_t ahnd = get_reloc_ahnd(i915, ctx->id); igt_require(gem_scheduler_has_timeslicing(i915)); @@ -843,6 +923,7 @@ static void semaphore_userlock(int i915, const intel_ctx_t *ctx, for_each_ctx_engine(i915, ctx, e) { if (!spin) { spin = igt_spin_new(i915, + .ahnd = ahnd, .ctx = ctx, .dependency = scratch, .engine = e->flags, @@ -885,6 +966,7 @@ static void semaphore_userlock(int i915, const intel_ctx_t *ctx, gem_close(i915, obj.handle); igt_spin_free(i915, spin); + put_ahnd(ahnd); } static void semaphore_codependency(int i915, const intel_ctx_t *ctx, @@ -894,6 +976,7 @@ static void semaphore_codependency(int i915, const intel_ctx_t *ctx, struct { igt_spin_t *xcs, *rcs; } task[2]; + uint64_t ahnd; int i; /* @@ -919,9 +1002,11 @@ static void semaphore_codependency(int i915, const intel_ctx_t *ctx, continue; tmp_ctx = intel_ctx_create(i915, &ctx->cfg); + ahnd = get_simple_l2h_ahnd(i915, tmp_ctx->id); task[i].xcs = __igt_spin_new(i915, + .ahnd = ahnd, .ctx = tmp_ctx, .engine = e->flags, .flags = IGT_SPIN_POLL_RUN | flags); @@ -930,6 +1015,7 @@ static void semaphore_codependency(int i915, const intel_ctx_t *ctx, /* Common rcs tasks will be queued in FIFO */ task[i].rcs = __igt_spin_new(i915, + .ahnd = ahnd, .ctx = tmp_ctx, .engine = 0, .dependency = task[i].xcs->handle); @@ -952,8 +1038,10 @@ static void semaphore_codependency(int i915, const intel_ctx_t *ctx, } for (i = 0; i < ARRAY_SIZE(task); i++) { + ahnd = task[i].rcs->ahnd; igt_spin_free(i915, task[i].xcs); igt_spin_free(i915, task[i].rcs); + put_ahnd(ahnd); } } @@ -964,6 +1052,7 @@ static void semaphore_resolve(int i915, const intel_ctx_cfg_t *cfg, const uint32_t SEMAPHORE_ADDR = 64 << 10; uint32_t semaphore, *sema; const intel_ctx_t *outer, *inner; + uint64_t ahnd = get_reloc_ahnd(i915, 0); /* * Userspace may submit batches that wait upon unresolved @@ -994,7 +1083,8 @@ static void semaphore_resolve(int i915, const intel_ctx_cfg_t *cfg, if (!gem_class_can_store_dword(i915, e->class)) continue; - spin = __igt_spin_new(i915, .engine = e->flags, .flags = flags); + spin = __igt_spin_new(i915, .ahnd = ahnd, + .engine = e->flags, .flags = flags); igt_spin_end(spin); /* we just want its address for later */ gem_sync(i915, spin->handle); igt_spin_reset(spin); @@ -1086,6 +1176,7 @@ static void semaphore_resolve(int i915, const intel_ctx_cfg_t *cfg, intel_ctx_destroy(i915, inner); intel_ctx_destroy(i915, outer); + put_ahnd(ahnd); } static void semaphore_noskip(int i915, const intel_ctx_cfg_t *cfg, @@ -1094,10 +1185,12 @@ static void semaphore_noskip(int i915, const intel_ctx_cfg_t *cfg, const unsigned int gen = intel_gen(intel_get_drm_devid(i915)); const struct intel_execution_engine2 *outer, *inner; const intel_ctx_t *ctx; + uint64_t ahnd; igt_require(gen >= 6); /* MI_STORE_DWORD_IMM convenience */ ctx = intel_ctx_create(i915, cfg); + ahnd = get_reloc_ahnd(i915, ctx->id); for_each_ctx_engine(i915, ctx, outer) { for_each_ctx_engine(i915, ctx, inner) { @@ -1110,10 +1203,10 @@ static void semaphore_noskip(int i915, const intel_ctx_cfg_t *cfg, !gem_class_can_store_dword(i915, inner->class)) continue; - chain = __igt_spin_new(i915, .ctx = ctx, + chain = __igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx, .engine = outer->flags, .flags = flags); - spin = __igt_spin_new(i915, .ctx = ctx, + spin = __igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx, .engine = inner->flags, .flags = flags); igt_spin_end(spin); /* we just want its address for later */ gem_sync(i915, spin->handle); @@ -1172,6 +1265,7 @@ static void semaphore_noskip(int i915, const intel_ctx_cfg_t *cfg, } intel_ctx_destroy(i915, ctx); + put_ahnd(ahnd); } static void @@ -1197,6 +1291,7 @@ noreorder(int i915, const intel_ctx_cfg_t *cfg, igt_spin_t *spin; int fence = -1; uint64_t addr; + uint64_t ahnd[2]; if (flags & CORKED) fence = igt_cork_plug(&cork, i915); @@ -1205,8 +1300,9 @@ noreorder(int i915, const intel_ctx_cfg_t *cfg, vm_cfg.vm = gem_vm_create(i915); ctx = intel_ctx_create(i915, &vm_cfg); + ahnd[0] = get_reloc_ahnd(i915, ctx->id); - spin = igt_spin_new(i915, .ctx = ctx, + spin = igt_spin_new(i915, .ahnd = ahnd[0], .ctx = ctx, .engine = engine, .fence = fence, .flags = IGT_SPIN_FENCE_OUT | IGT_SPIN_FENCE_IN); @@ -1281,7 +1377,9 @@ noreorder(int i915, const intel_ctx_cfg_t *cfg, * Without timeslices, fallback to waiting a second. */ ctx = intel_ctx_create(i915, &vm_cfg); + ahnd[1] = get_reloc_ahnd(i915, ctx->id); slice = igt_spin_new(i915, + .ahnd = ahnd[1], .ctx = ctx, .engine = engine, .flags = IGT_SPIN_POLL_RUN); @@ -1299,6 +1397,8 @@ noreorder(int i915, const intel_ctx_cfg_t *cfg, igt_assert_eq(sync_fence_status(spin->out_fence), 0); igt_spin_free(i915, spin); gem_quiescent_gpu(i915); + put_ahnd(ahnd[0]); + put_ahnd(ahnd[1]); } static void reorder(int fd, const intel_ctx_cfg_t *cfg, @@ -1310,6 +1410,14 @@ static void reorder(int fd, const intel_ctx_cfg_t *cfg, uint32_t result; const intel_ctx_t *ctx[2]; int fence; + uint64_t ahnd, scratch_offset; + + /* + * We use reloc ahnd for default context because we're interested + * acquiring distinct offsets only. This saves us typing - otherwise + * we should get scratch_offset for each context separately. + */ + ahnd = get_reloc_ahnd(fd, 0); ctx[LO] = intel_ctx_create(fd, cfg); gem_context_set_priority(fd, ctx[LO]->id, MIN_PRIO); @@ -1318,19 +1426,25 @@ static void reorder(int fd, const intel_ctx_cfg_t *cfg, gem_context_set_priority(fd, ctx[HI]->id, flags & EQUAL ? MIN_PRIO : 0); scratch = gem_create(fd, 4096); + scratch_offset = get_offset(ahnd, scratch, 4096, 0); + fence = igt_cork_plug(&cork, fd); /* We expect the high priority context to be executed first, and * so the final result will be value from the low priority context. */ - store_dword_fenced(fd, ctx[LO], ring, scratch, 0, ctx[LO]->id, fence, 0); - store_dword_fenced(fd, ctx[HI], ring, scratch, 0, ctx[HI]->id, fence, 0); + store_dword_fenced(fd, ahnd, ctx[LO], ring, scratch, scratch_offset, + 0, ctx[LO]->id, fence, 0); + store_dword_fenced(fd, ahnd, ctx[HI], ring, scratch, scratch_offset, + 0, ctx[HI]->id, fence, 0); unplug_show_queue(fd, &cork, cfg, ring); close(fence); result = __sync_read_u32(fd, scratch, 0); gem_close(fd, scratch); + put_offset(ahnd, scratch); + put_ahnd(ahnd); if (flags & EQUAL) /* equal priority, result will be fifo */ igt_assert_eq_u32(result, ctx[HI]->id); @@ -1348,6 +1462,7 @@ static void promotion(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) uint32_t result_read, dep_read; const intel_ctx_t *ctx[3]; int fence; + uint64_t ahnd = get_reloc_ahnd(fd, 0), result_offset, dep_offset; ctx[LO] = intel_ctx_create(fd, cfg); gem_context_set_priority(fd, ctx[LO]->id, MIN_PRIO); @@ -1359,7 +1474,9 @@ static void promotion(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) gem_context_set_priority(fd, ctx[NOISE]->id, MIN_PRIO/2); result = gem_create(fd, 4096); + result_offset = get_offset(ahnd, result, 4096, 0); dep = gem_create(fd, 4096); + dep_offset = get_offset(ahnd, dep, 4096, 0); fence = igt_cork_plug(&cork, fd); @@ -1368,14 +1485,19 @@ static void promotion(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) * fifo would be NOISE, LO, HI. * strict priority would be HI, NOISE, LO */ - store_dword_fenced(fd, ctx[NOISE], ring, result, 0, ctx[NOISE]->id, fence, 0); - store_dword_fenced(fd, ctx[LO], ring, result, 0, ctx[LO]->id, fence, 0); + store_dword_fenced(fd, ahnd, ctx[NOISE], ring, result, result_offset, + 0, ctx[NOISE]->id, fence, 0); + store_dword_fenced(fd, ahnd, ctx[LO], ring, result, result_offset, + 0, ctx[LO]->id, fence, 0); /* link LO <-> HI via a dependency on another buffer */ - store_dword(fd, ctx[LO], ring, dep, 0, ctx[LO]->id, I915_GEM_DOMAIN_INSTRUCTION); - store_dword(fd, ctx[HI], ring, dep, 0, ctx[HI]->id, 0); + store_dword(fd, ahnd, ctx[LO], ring, dep, dep_offset, + 0, ctx[LO]->id, I915_GEM_DOMAIN_INSTRUCTION); + store_dword(fd, ahnd, ctx[HI], ring, dep, dep_offset, + 0, ctx[HI]->id, 0); - store_dword(fd, ctx[HI], ring, result, 0, ctx[HI]->id, 0); + store_dword(fd, ahnd, ctx[HI], ring, result, result_offset, + 0, ctx[HI]->id, 0); unplug_show_queue(fd, &cork, cfg, ring); close(fence); @@ -1385,6 +1507,9 @@ static void promotion(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) result_read = __sync_read_u32(fd, result, 0); gem_close(fd, result); + put_offset(ahnd, result); + put_offset(ahnd, dep); + put_ahnd(ahnd); igt_assert_eq_u32(dep_read, ctx[HI]->id); igt_assert_eq_u32(result_read, ctx[NOISE]->id); @@ -1413,32 +1538,42 @@ static void preempt(int fd, const intel_ctx_cfg_t *cfg, igt_spin_t *spin[MAX_ELSP_QLEN]; const intel_ctx_t *ctx[2]; igt_hang_t hang; + uint64_t ahnd = get_reloc_ahnd(fd, 0); + uint64_t ahnd_lo_arr[MAX_ELSP_QLEN], ahnd_lo; + uint64_t result_offset = get_offset(ahnd, result, 4096, 0); /* Set a fast timeout to speed the test up (if available) */ set_preempt_timeout(fd, e, 150); ctx[LO] = intel_ctx_create(fd, cfg); gem_context_set_priority(fd, ctx[LO]->id, MIN_PRIO); + ahnd_lo = get_reloc_ahnd(fd, ctx[LO]->id); ctx[HI] = intel_ctx_create(fd, cfg); gem_context_set_priority(fd, ctx[HI]->id, MAX_PRIO); if (flags & HANG_LP) - hang = igt_hang_ctx(fd, ctx[LO]->id, e->flags, 0); + hang = igt_hang_ctx_with_ahnd(fd, ahnd_lo, ctx[LO]->id, e->flags, 0); for (int n = 0; n < ARRAY_SIZE(spin); n++) { + uint64_t currahnd = ahnd_lo; + if (flags & NEW_CTX) { intel_ctx_destroy(fd, ctx[LO]); ctx[LO] = intel_ctx_create(fd, cfg); gem_context_set_priority(fd, ctx[LO]->id, MIN_PRIO); + ahnd_lo_arr[n] = get_reloc_ahnd(fd, ctx[LO]->id); + currahnd = ahnd_lo_arr[n]; } spin[n] = __igt_spin_new(fd, + .ahnd = currahnd, .ctx = ctx[LO], .engine = e->flags, .flags = flags & USERPTR ? IGT_SPIN_USERPTR : 0); igt_debug("spin[%d].handle=%d\n", n, spin[n]->handle); - store_dword(fd, ctx[HI], e->flags, result, 0, n + 1, I915_GEM_DOMAIN_RENDER); + store_dword(fd, ahnd, ctx[HI], e->flags, result, result_offset, + 0, n + 1, I915_GEM_DOMAIN_RENDER); result_read = __sync_read_u32(fd, result, 0); igt_assert_eq_u32(result_read, n + 1); @@ -1453,6 +1588,13 @@ static void preempt(int fd, const intel_ctx_cfg_t *cfg, intel_ctx_destroy(fd, ctx[LO]); intel_ctx_destroy(fd, ctx[HI]); + put_ahnd(ahnd); + put_ahnd(ahnd_lo); + + if (flags & NEW_CTX) { + for (int n = 0; n < ARRAY_SIZE(spin); n++) + put_ahnd(ahnd_lo_arr[n]); + } gem_close(fd, result); } @@ -1460,7 +1602,7 @@ static void preempt(int fd, const intel_ctx_cfg_t *cfg, #define CHAIN 0x1 #define CONTEXTS 0x2 -static igt_spin_t *__noise(int fd, const intel_ctx_t *ctx, +static igt_spin_t *__noise(int fd, uint64_t ahnd, const intel_ctx_t *ctx, int prio, igt_spin_t *spin) { const struct intel_execution_engine2 *e; @@ -1470,6 +1612,7 @@ static igt_spin_t *__noise(int fd, const intel_ctx_t *ctx, for_each_ctx_engine(fd, ctx, e) { if (spin == NULL) { spin = __igt_spin_new(fd, + .ahnd = ahnd, .ctx = ctx, .engine = e->flags); } else { @@ -1487,6 +1630,7 @@ static igt_spin_t *__noise(int fd, const intel_ctx_t *ctx, } static void __preempt_other(int fd, + uint64_t *ahnd, const intel_ctx_t **ctx, unsigned int target, unsigned int primary, unsigned flags) @@ -1495,24 +1639,27 @@ static void __preempt_other(int fd, uint32_t result = gem_create(fd, 4096); uint32_t result_read[4096 / sizeof(uint32_t)]; unsigned int n, i; + uint64_t result_offset_lo = get_offset(ahnd[LO], result, 4096, 0); + uint64_t result_offset_hi = get_offset(ahnd[HI], result, 4096, 0); n = 0; - store_dword(fd, ctx[LO], primary, - result, (n + 1)*sizeof(uint32_t), n + 1, + store_dword(fd, ahnd[LO], ctx[LO], primary, + result, result_offset_lo, (n + 1)*sizeof(uint32_t), n + 1, I915_GEM_DOMAIN_RENDER); n++; if (flags & CHAIN) { for_each_ctx_engine(fd, ctx[LO], e) { - store_dword(fd, ctx[LO], e->flags, - result, (n + 1)*sizeof(uint32_t), n + 1, + store_dword(fd, ahnd[LO], ctx[LO], e->flags, + result, result_offset_lo, + (n + 1)*sizeof(uint32_t), n + 1, I915_GEM_DOMAIN_RENDER); n++; } } - store_dword(fd, ctx[HI], target, - result, (n + 1)*sizeof(uint32_t), n + 1, + store_dword(fd, ahnd[HI], ctx[HI], target, + result, result_offset_hi, (n + 1)*sizeof(uint32_t), n + 1, I915_GEM_DOMAIN_RENDER); igt_debugfs_dump(fd, "i915_engine_info"); @@ -1525,6 +1672,8 @@ static void __preempt_other(int fd, igt_assert_eq_u32(result_read[i], i); gem_close(fd, result); + put_offset(ahnd[LO], result); + put_offset(ahnd[HI], result); } static void preempt_other(int fd, const intel_ctx_cfg_t *cfg, @@ -1533,6 +1682,7 @@ static void preempt_other(int fd, const intel_ctx_cfg_t *cfg, const struct intel_execution_engine2 *e; igt_spin_t *spin = NULL; const intel_ctx_t *ctx[3]; + uint64_t ahnd[3]; /* On each engine, insert * [NOISE] spinner, @@ -1546,16 +1696,19 @@ static void preempt_other(int fd, const intel_ctx_cfg_t *cfg, ctx[LO] = intel_ctx_create(fd, cfg); gem_context_set_priority(fd, ctx[LO]->id, MIN_PRIO); + ahnd[LO] = get_reloc_ahnd(fd, ctx[LO]->id); ctx[NOISE] = intel_ctx_create(fd, cfg); - spin = __noise(fd, ctx[NOISE], 0, NULL); + ahnd[NOISE] = get_reloc_ahnd(fd, ctx[NOISE]->id); + spin = __noise(fd, ahnd[NOISE], ctx[NOISE], 0, NULL); ctx[HI] = intel_ctx_create(fd, cfg); gem_context_set_priority(fd, ctx[HI]->id, MAX_PRIO); + ahnd[HI] = get_reloc_ahnd(fd, ctx[HI]->id); for_each_ctx_cfg_engine(fd, cfg, e) { igt_debug("Primary engine: %s\n", e->name); - __preempt_other(fd, ctx, ring, e->flags, flags); + __preempt_other(fd, ahnd, ctx, ring, e->flags, flags); } @@ -1565,6 +1718,9 @@ static void preempt_other(int fd, const intel_ctx_cfg_t *cfg, intel_ctx_destroy(fd, ctx[LO]); intel_ctx_destroy(fd, ctx[NOISE]); intel_ctx_destroy(fd, ctx[HI]); + put_ahnd(ahnd[LO]); + put_ahnd(ahnd[NOISE]); + put_ahnd(ahnd[HI]); } static void __preempt_queue(int fd, const intel_ctx_cfg_t *cfg, @@ -1574,12 +1730,18 @@ static void __preempt_queue(int fd, const intel_ctx_cfg_t *cfg, const struct intel_execution_engine2 *e; uint32_t result = gem_create(fd, 4096); uint32_t result_read[4096 / sizeof(uint32_t)]; + uint64_t result_offset; igt_spin_t *above = NULL, *below = NULL; const intel_ctx_t *ctx[3] = { intel_ctx_create(fd, cfg), intel_ctx_create(fd, cfg), intel_ctx_create(fd, cfg), }; + uint64_t ahnd[3] = { + get_reloc_ahnd(fd, ctx[0]->id), + get_reloc_ahnd(fd, ctx[1]->id), + get_reloc_ahnd(fd, ctx[2]->id), + }; int prio = MAX_PRIO; unsigned int n, i; @@ -1588,7 +1750,7 @@ static void __preempt_queue(int fd, const intel_ctx_cfg_t *cfg, intel_ctx_destroy(fd, ctx[NOISE]); ctx[NOISE] = intel_ctx_create(fd, cfg); } - above = __noise(fd, ctx[NOISE], prio--, above); + above = __noise(fd, ahnd[NOISE], ctx[NOISE], prio--, above); } gem_context_set_priority(fd, ctx[HI]->id, prio--); @@ -1598,28 +1760,31 @@ static void __preempt_queue(int fd, const intel_ctx_cfg_t *cfg, intel_ctx_destroy(fd, ctx[NOISE]); ctx[NOISE] = intel_ctx_create(fd, cfg); } - below = __noise(fd, ctx[NOISE], prio--, below); + below = __noise(fd, ahnd[NOISE], ctx[NOISE], prio--, below); } gem_context_set_priority(fd, ctx[LO]->id, prio--); n = 0; - store_dword(fd, ctx[LO], primary, - result, (n + 1)*sizeof(uint32_t), n + 1, + result_offset = get_offset(ahnd[LO], result, 4096, 0); + store_dword(fd, ahnd[LO], ctx[LO], primary, + result, result_offset, (n + 1)*sizeof(uint32_t), n + 1, I915_GEM_DOMAIN_RENDER); n++; if (flags & CHAIN) { for_each_ctx_engine(fd, ctx[LO], e) { - store_dword(fd, ctx[LO], e->flags, - result, (n + 1)*sizeof(uint32_t), n + 1, + store_dword(fd, ahnd[LO], ctx[LO], e->flags, + result, result_offset, + (n + 1)*sizeof(uint32_t), n + 1, I915_GEM_DOMAIN_RENDER); n++; } } - store_dword(fd, ctx[HI], target, - result, (n + 1)*sizeof(uint32_t), n + 1, + result_offset = get_offset(ahnd[HI], result, 4096, 0); + store_dword(fd, ahnd[HI], ctx[HI], target, + result, result_offset, (n + 1)*sizeof(uint32_t), n + 1, I915_GEM_DOMAIN_RENDER); igt_debugfs_dump(fd, "i915_engine_info"); @@ -1647,6 +1812,11 @@ static void __preempt_queue(int fd, const intel_ctx_cfg_t *cfg, intel_ctx_destroy(fd, ctx[HI]); gem_close(fd, result); + put_offset(ahnd[LO], result); + put_offset(ahnd[HI], result); + put_ahnd(ahnd[LO]); + put_ahnd(ahnd[NOISE]); + put_ahnd(ahnd[HI]); } static void preempt_queue(int fd, const intel_ctx_cfg_t *cfg, @@ -1679,6 +1849,7 @@ static void preempt_engines(int i915, IGT_LIST_HEAD(plist); igt_spin_t *spin, *sn; const intel_ctx_t *ctx; + uint64_t ahnd; /* * A quick test that each engine within a context is an independent @@ -1694,12 +1865,14 @@ static void preempt_engines(int i915, igt_list_add(&pnode[n].link, &plist); } ctx = intel_ctx_create(i915, &cfg); + ahnd = get_reloc_ahnd(i915, ctx->id); for (int n = -(GEM_MAX_ENGINES - 1); n < GEM_MAX_ENGINES; n++) { unsigned int engine = n & I915_EXEC_RING_MASK; gem_context_set_priority(i915, ctx->id, n); - spin = igt_spin_new(i915, .ctx = ctx, .engine = engine); + spin = igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx, + .engine = engine); igt_list_move_tail(&spin->link, &pnode[engine].spinners); igt_list_move(&pnode[engine].link, &plist); @@ -1713,6 +1886,7 @@ static void preempt_engines(int i915, } } intel_ctx_destroy(i915, ctx); + put_ahnd(ahnd); } static void preempt_self(int fd, const intel_ctx_cfg_t *cfg, @@ -1724,6 +1898,7 @@ static void preempt_self(int fd, const intel_ctx_cfg_t *cfg, igt_spin_t *spin[MAX_ELSP_QLEN]; unsigned int n, i; const intel_ctx_t *ctx[3]; + uint64_t ahnd[3], result_offset; /* On each engine, insert * [NOISE] spinner, @@ -1735,21 +1910,26 @@ static void preempt_self(int fd, const intel_ctx_cfg_t *cfg, ctx[NOISE] = intel_ctx_create(fd, cfg); ctx[HI] = intel_ctx_create(fd, cfg); + ahnd[NOISE] = get_reloc_ahnd(fd, ctx[NOISE]->id); + ahnd[HI] = get_reloc_ahnd(fd, ctx[HI]->id); + result_offset = get_offset(ahnd[HI], result, 4096, 0); n = 0; gem_context_set_priority(fd, ctx[HI]->id, MIN_PRIO); for_each_ctx_cfg_engine(fd, cfg, e) { spin[n] = __igt_spin_new(fd, + .ahnd = ahnd[NOISE], .ctx = ctx[NOISE], .engine = e->flags); - store_dword(fd, ctx[HI], e->flags, - result, (n + 1)*sizeof(uint32_t), n + 1, + store_dword(fd, ahnd[HI], ctx[HI], e->flags, + result, result_offset, + (n + 1)*sizeof(uint32_t), n + 1, I915_GEM_DOMAIN_RENDER); n++; } gem_context_set_priority(fd, ctx[HI]->id, MAX_PRIO); - store_dword(fd, ctx[HI], ring, - result, (n + 1)*sizeof(uint32_t), n + 1, + store_dword(fd, ahnd[HI], ctx[HI], ring, + result, result_offset, (n + 1)*sizeof(uint32_t), n + 1, I915_GEM_DOMAIN_RENDER); gem_set_domain(fd, result, I915_GEM_DOMAIN_GTT, 0); @@ -1769,6 +1949,9 @@ static void preempt_self(int fd, const intel_ctx_cfg_t *cfg, intel_ctx_destroy(fd, ctx[HI]); gem_close(fd, result); + put_offset(ahnd[HI], result); + put_ahnd(ahnd[NOISE]); + put_ahnd(ahnd[HI]); } static void preemptive_hang(int fd, const intel_ctx_cfg_t *cfg, @@ -1777,25 +1960,29 @@ static void preemptive_hang(int fd, const intel_ctx_cfg_t *cfg, igt_spin_t *spin[MAX_ELSP_QLEN]; igt_hang_t hang; const intel_ctx_t *ctx[2]; + uint64_t ahnd_hi, ahnd_lo; /* Set a fast timeout to speed the test up (if available) */ set_preempt_timeout(fd, e, 150); ctx[HI] = intel_ctx_create(fd, cfg); gem_context_set_priority(fd, ctx[HI]->id, MAX_PRIO); + ahnd_hi = get_reloc_ahnd(fd, ctx[HI]->id); for (int n = 0; n < ARRAY_SIZE(spin); n++) { ctx[LO] = intel_ctx_create(fd, cfg); gem_context_set_priority(fd, ctx[LO]->id, MIN_PRIO); + ahnd_lo = get_reloc_ahnd(fd, ctx[LO]->id); spin[n] = __igt_spin_new(fd, + .ahnd = ahnd_lo, .ctx = ctx[LO], .engine = e->flags); intel_ctx_destroy(fd, ctx[LO]); } - hang = igt_hang_ctx(fd, ctx[HI]->id, e->flags, 0); + hang = igt_hang_ctx_with_ahnd(fd, ahnd_hi, ctx[HI]->id, e->flags, 0); igt_post_hang_ring(fd, hang); for (int n = 0; n < ARRAY_SIZE(spin); n++) { @@ -1803,11 +1990,14 @@ static void preemptive_hang(int fd, const intel_ctx_cfg_t *cfg, * This is subject to change as the scheduler evolve. The test should * be updated to reflect such changes. */ + ahnd_lo = spin[n]->ahnd; igt_assert(gem_bo_busy(fd, spin[n]->handle)); igt_spin_free(fd, spin[n]); + put_ahnd(ahnd_lo); } intel_ctx_destroy(fd, ctx[HI]); + put_ahnd(ahnd_hi); } static void deep(int fd, const intel_ctx_cfg_t *cfg, @@ -1823,6 +2013,8 @@ static void deep(int fd, const intel_ctx_cfg_t *cfg, uint32_t result, dep[XS]; uint32_t read_buf[size / sizeof(uint32_t)]; uint32_t expected = 0; + uint64_t ahnd = get_reloc_ahnd(fd, 0); + uint64_t result_offset, dep_offset[XS], plug_offset; const intel_ctx_t **ctx; int dep_nreq; int n; @@ -1838,6 +2030,7 @@ static void deep(int fd, const intel_ctx_cfg_t *cfg, igt_info("Using %d requests (prio range %d)\n", nreq, max_req); result = gem_create(fd, size); + result_offset = get_offset(ahnd, result, size, 0); for (int m = 0; m < XS; m ++) dep[m] = gem_create(fd, size); @@ -1848,10 +2041,23 @@ static void deep(int fd, const intel_ctx_cfg_t *cfg, const uint32_t bbe = MI_BATCH_BUFFER_END; memset(obj, 0, sizeof(obj)); - for (n = 0; n < XS; n++) + for (n = 0; n < XS; n++) { obj[n].handle = dep[n]; + if (ahnd) { + obj[n].offset = get_offset(ahnd, obj[n].handle, + size, 0); + dep_offset[n] = obj[n].offset; + obj[n].flags |= EXEC_OBJECT_PINNED; + } + } obj[XS].handle = result; + obj[XS].offset = result_offset; obj[XS+1].handle = gem_create(fd, 4096); + obj[XS+1].offset = get_offset(ahnd, obj[XS+1].handle, 4096, 0); + if (ahnd) { + obj[XS].flags |= EXEC_OBJECT_PINNED; + obj[XS+1].flags |= EXEC_OBJECT_PINNED; + } gem_write(fd, obj[XS+1].handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); @@ -1867,6 +2073,7 @@ static void deep(int fd, const intel_ctx_cfg_t *cfg, } plug = igt_cork_plug(&cork, fd); + plug_offset = get_offset(ahnd, plug, 4096, 0); /* Create a deep dependency chain, with a few branches */ for (n = 0; n < nreq && igt_seconds_elapsed(&tv) < 2; n++) { @@ -1874,7 +2081,10 @@ static void deep(int fd, const intel_ctx_cfg_t *cfg, gem_context_set_priority(fd, context->id, MAX_PRIO - nreq + n); for (int m = 0; m < XS; m++) - store_dword_plug(fd, context, ring, dep[m], 4*n, context->id, plug, I915_GEM_DOMAIN_INSTRUCTION); + store_dword_plug(fd, ahnd, context, ring, + dep[m], dep_offset[m], 4*n, + context->id, plug, plug_offset, + I915_GEM_DOMAIN_INSTRUCTION); } igt_info("First deptree: %d requests [%.3fs]\n", n * XS, 1e-9*igt_nsec_elapsed(&tv)); @@ -1886,8 +2096,10 @@ static void deep(int fd, const intel_ctx_cfg_t *cfg, expected = context->id; for (int m = 0; m < XS; m++) { - store_dword_plug(fd, context, ring, result, 4*n, expected, dep[m], 0); - store_dword(fd, context, ring, result, 4*m, expected, I915_GEM_DOMAIN_INSTRUCTION); + store_dword_plug(fd, ahnd, context, ring, result, result_offset, + 4*n, expected, dep[m], dep_offset[m], 0); + store_dword(fd, ahnd, context, ring, result, result_offset, + 4*m, expected, I915_GEM_DOMAIN_INSTRUCTION); } } igt_info("Second deptree: %d requests [%.3fs]\n", @@ -1912,8 +2124,13 @@ static void deep(int fd, const intel_ctx_cfg_t *cfg, gem_close(fd, result); /* No reordering due to PI on all contexts because of the common dep */ - for (int m = 0; m < XS; m++) + for (int m = 0; m < XS; m++) { + put_offset(ahnd, dep[m]); igt_assert_eq_u32(read_buf[m], expected); + } + put_offset(ahnd, result); + put_offset(ahnd, plug); + put_ahnd(ahnd); free(ctx); #undef XS @@ -1941,12 +2158,14 @@ static void wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) const intel_ctx_t **ctx; unsigned int count; int fence; + uint64_t ahnd = get_reloc_ahnd(fd, 0), result_offset; ctx = malloc(sizeof(*ctx)*MAX_CONTEXTS); for (int n = 0; n < MAX_CONTEXTS; n++) ctx[n] = intel_ctx_create(fd, cfg); result = gem_create(fd, 4*MAX_CONTEXTS); + result_offset = get_offset(ahnd, result, 4 * MAX_CONTEXTS, 0); fence = igt_cork_plug(&cork, fd); @@ -1955,7 +2174,8 @@ static void wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) igt_seconds_elapsed(&tv) < 5 && count < ring_size; count++) { for (int n = 0; n < MAX_CONTEXTS; n++) { - store_dword_fenced(fd, ctx[n], ring, result, 4*n, ctx[n]->id, + store_dword_fenced(fd, ahnd, ctx[n], ring, + result, result_offset, 4*n, ctx[n]->id, fence, I915_GEM_DOMAIN_INSTRUCTION); } } @@ -1974,6 +2194,8 @@ static void wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) gem_close(fd, result); free(ctx); + put_offset(ahnd, result); + put_ahnd(ahnd); } static void reorder_wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) @@ -1989,8 +2211,11 @@ static void reorder_wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) IGT_CORK_FENCE(cork); uint32_t *expected; int fence; + uint64_t ahnd = get_reloc_ahnd(fd, 0), result_offset; + unsigned int sz = ALIGN(ring_size * 64, 4096); result = gem_create(fd, 4096); + result_offset = get_offset(ahnd, result, 4096, 0); target = gem_create(fd, 4096); fence = igt_cork_plug(&cork, fd); @@ -1999,11 +2224,13 @@ static void reorder_wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) memset(obj, 0, sizeof(obj)); obj[0].handle = result; + obj[0].offset = result_offset; obj[1].relocs_ptr = to_user_pointer(&reloc); - obj[1].relocation_count = 1; + obj[1].relocation_count = !ahnd ? 1 : 0; memset(&reloc, 0, sizeof(reloc)); reloc.target_handle = result; + reloc.presumed_offset = obj[0].offset; reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc.write_domain = 0; /* lies */ @@ -2017,8 +2244,12 @@ static void reorder_wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) execbuf.flags |= I915_EXEC_FENCE_IN; execbuf.rsvd2 = fence; + if (ahnd) { + obj[0].flags |= EXEC_OBJECT_PINNED; + obj[1].flags |= EXEC_OBJECT_PINNED; + } + for (int n = 0, x = 1; n < ARRAY_SIZE(priorities); n++, x++) { - unsigned int sz = ALIGN(ring_size * 64, 4096); uint32_t *batch; const intel_ctx_t *tmp_ctx; @@ -2027,6 +2258,9 @@ static void reorder_wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) execbuf.rsvd1 = tmp_ctx->id; obj[1].handle = gem_create(fd, sz); + if (ahnd) + obj[1].offset = get_offset(ahnd, obj[1].handle, sz, 0); + batch = gem_mmap__device_coherent(fd, obj[1].handle, 0, sz, PROT_WRITE); gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); @@ -2064,6 +2298,7 @@ static void reorder_wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) munmap(batch, sz); gem_close(fd, obj[1].handle); + put_offset(ahnd, obj[1].handle); intel_ctx_destroy(fd, tmp_ctx); } @@ -2078,6 +2313,8 @@ static void reorder_wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring) gem_close(fd, result); gem_close(fd, target); + put_offset(ahnd, result); + put_ahnd(ahnd); } static void bind_to_cpu(int cpu) @@ -2268,6 +2505,10 @@ struct ufd_thread { pthread_mutex_t mutex; pthread_cond_t cond; int count; + + uint64_t ahnd; + uint64_t batch_offset; + uint64_t scratch_offset; }; static uint32_t create_userptr(int i915, void *page) @@ -2419,9 +2660,9 @@ static void *iova_thread(struct ufd_thread *t, int prio) ctx = intel_ctx_create(t->i915, t->cfg); gem_context_set_priority(t->i915, ctx->id, prio); - store_dword_plug(t->i915, ctx, t->engine, - t->scratch, 0, prio, - t->batch, 0 /* no write hazard! */); + store_dword_plug(t->i915, t->ahnd, ctx, t->engine, + t->scratch, t->scratch_offset, 0, prio, + t->batch, t->batch_offset, 0 /* no write hazard! */); pthread_mutex_lock(&t->mutex); if (!--t->count) @@ -2455,6 +2696,7 @@ static void test_pi_iova(int i915, const intel_ctx_cfg_t *cfg, pthread_t hi, lo; char poison[4096]; int ufd; + uint64_t ahnd = get_reloc_ahnd(i915, 0); /* * In this scenario, we have a pair of contending contexts that @@ -2485,6 +2727,7 @@ static void test_pi_iova(int i915, const intel_ctx_cfg_t *cfg, t.i915 = i915; t.cfg = &ufd_cfg; t.engine = engine; + t.ahnd = ahnd; t.count = 2; pthread_cond_init(&t.cond, NULL); @@ -2494,6 +2737,8 @@ static void test_pi_iova(int i915, const intel_ctx_cfg_t *cfg, igt_assert(t.page != MAP_FAILED); t.batch = create_userptr(i915, t.page); t.scratch = gem_create(i915, 4096); + t.batch_offset = get_offset(ahnd, t.batch, 4096, 0); + t.scratch_offset = get_offset(ahnd, t.scratch, 4096, 0); /* Register our fault handler for t.page */ memset(®, 0, sizeof(reg)); @@ -2521,7 +2766,7 @@ static void test_pi_iova(int i915, const intel_ctx_cfg_t *cfg, * the local tasklet will not run until after all signals have been * delivered... but another tasklet might). */ - spin = igt_spin_new(i915, .engine = engine); + spin = igt_spin_new(i915, .ahnd = ahnd, .engine = engine); for (int i = 0; i < MAX_ELSP_QLEN; i++) { const intel_ctx_t *ctx = create_highest_priority(i915, cfg); spin->execbuf.rsvd1 = ctx->id; @@ -2554,6 +2799,9 @@ static void test_pi_iova(int i915, const intel_ctx_cfg_t *cfg, pthread_mutex_unlock(&t.mutex); igt_debugfs_dump(i915, "i915_engine_info"); igt_spin_free(i915, spin); + put_offset(ahnd, t.scratch); + put_offset(ahnd, t.batch); + put_ahnd(ahnd); pthread_join(hi, NULL); pthread_join(lo, NULL); @@ -2574,6 +2822,7 @@ static void measure_semaphore_power(int i915, const intel_ctx_t *ctx) { const struct intel_execution_engine2 *signaler, *e; struct rapl gpu, pkg; + uint64_t ahnd = get_simple_l2h_ahnd(i915, ctx->id); igt_require(gpu_power_open(&gpu) == 0); pkg_power_open(&pkg); @@ -2584,12 +2833,14 @@ static void measure_semaphore_power(int i915, const intel_ctx_t *ctx) } s_spin[2], s_sema[2]; double baseline, total; int64_t jiffie = 1; - igt_spin_t *spin; + igt_spin_t *spin, *sema[GEM_MAX_ENGINES] = {}; + int i; if (!gem_class_can_store_dword(i915, signaler->class)) continue; spin = __igt_spin_new(i915, + .ahnd = ahnd, .ctx = ctx, .engine = signaler->flags, .flags = IGT_SPIN_POLL_RUN); @@ -2603,19 +2854,32 @@ static void measure_semaphore_power(int i915, const intel_ctx_t *ctx) rapl_read(&pkg, &s_spin[1].pkg); /* Add a waiter to each engine */ + i = 0; for_each_ctx_engine(i915, ctx, e) { - igt_spin_t *sema; - - if (e->flags == signaler->flags) + if (e->flags == signaler->flags) { + i++; continue; + } - sema = __igt_spin_new(i915, - .ctx = ctx, - .engine = e->flags, - .dependency = spin->handle); - - igt_spin_free(i915, sema); + /* + * We need same spin->handle offset for each sema + * so we need to use SIMPLE allocator. As freeing + * spinner lead to alloc same offset for next batch + * we would serialize spinners. To avoid this on + * SIMPLE we just defer freeing spinners when + * all of them will be created and each of them + * will have separate offsets for batchbuffer. + */ + sema[i] = __igt_spin_new(i915, + .ahnd = ahnd, + .ctx = ctx, + .engine = e->flags, + .dependency = spin->handle); + i++; } + for (i = 0; i < GEM_MAX_ENGINES; i++) + if (sema[i]) + igt_spin_free(i915, sema[i]); usleep(10); /* just give the tasklets a chance to run */ rapl_read(&pkg, &s_sema[0].pkg); @@ -2646,6 +2910,7 @@ static void measure_semaphore_power(int i915, const intel_ctx_t *ctx) rapl_close(&gpu); rapl_close(&pkg); + put_ahnd(ahnd); } static int read_timestamp_frequency(int i915) @@ -2703,9 +2968,16 @@ static uint32_t read_ctx_timestamp(int i915, const intel_ctx_t *ctx, #define RUNTIME (base + 0x3a8) uint32_t *map, *cs; uint32_t ts; + uint64_t ahnd = get_reloc_ahnd(i915, ctx->id); igt_require(base); + if (ahnd) { + obj.offset = get_offset(ahnd, obj.handle, 4096, 0); + obj.flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + obj.relocation_count = 0; + } + cs = map = gem_mmap__device_coherent(i915, obj.handle, 0, 4096, PROT_WRITE); @@ -2741,11 +3013,14 @@ static void fairslice(int i915, const intel_ctx_cfg_t *cfg, double threshold; const intel_ctx_t *ctx[3]; uint32_t ts[3]; + uint64_t ahnd; for (int i = 0; i < ARRAY_SIZE(ctx); i++) { ctx[i] = intel_ctx_create(i915, cfg); if (spin == NULL) { + ahnd = get_reloc_ahnd(i915, ctx[i]->id); spin = __igt_spin_new(i915, + .ahnd = ahnd, .ctx = ctx[i], .engine = e->flags, .flags = flags); @@ -2770,6 +3045,7 @@ static void fairslice(int i915, const intel_ctx_cfg_t *cfg, for (int i = 0; i < ARRAY_SIZE(ctx); i++) intel_ctx_destroy(i915, ctx[i]); igt_spin_free(i915, spin); + put_ahnd(ahnd); /* * If we imagine that the timeslices are randomly distributed to @@ -2879,6 +3155,9 @@ igt_main test_each_engine("u-fairslice", fd, ctx, e) fairslice(fd, &ctx->cfg, e, IGT_SPIN_USERPTR, 2); + igt_fixture { + intel_allocator_multiprocess_start(); + } igt_subtest("fairslice-all") { for_each_ctx_engine(fd, ctx, e) { igt_fork(child, 1) @@ -2895,6 +3174,9 @@ igt_main } igt_waitchildren(); } + igt_fixture { + intel_allocator_multiprocess_stop(); + } } test_each_engine("submit-early-slice", fd, ctx, e) |