diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2016-07-03 09:42:38 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2016-07-03 11:23:05 +0100 |
commit | 376b813e7c60826357034f4ea40ffada514fbdc3 (patch) | |
tree | 12d829ab778cd16e0b41eea88d8153163e624485 /tests | |
parent | 3e765840129a17b462565f7b48dfe9d9792b292f (diff) |
igt/gem_exec_gttfill: Reduce overhead in setting up filler batches
Since all the batches start with the same content, we can reuse the same
buf to fill them.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/gem_exec_gttfill.c | 92 | ||||
-rw-r--r-- | tests/kms_cursor_legacy.c | 15 |
2 files changed, 60 insertions, 47 deletions
diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c index 3ec43dcf..943f58da 100644 --- a/tests/gem_exec_gttfill.c +++ b/tests/gem_exec_gttfill.c @@ -22,6 +22,7 @@ */ #include "igt.h" +#include "igt_rand.h" IGT_TEST_DESCRIPTION("Fill the GTT with batches."); @@ -60,52 +61,59 @@ static void submit(int fd, int gen, uint32_t *handles, unsigned count) { struct drm_i915_gem_exec_object2 obj; + uint32_t batch[16]; + unsigned n; + + memset(&obj, 0, sizeof(obj)); + obj.relocs_ptr = (uintptr_t)reloc; + obj.relocation_count = 2; + + memset(reloc, 0, 2*sizeof(*reloc)); + reloc[0].offset = eb->batch_start_offset; + reloc[0].offset += sizeof(uint32_t); + reloc[0].delta = BATCH_SIZE - eb->batch_start_offset - 8; + reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION; + reloc[1].offset = eb->batch_start_offset; + reloc[1].offset += 3*sizeof(uint32_t); + reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION; + + n = 0; + batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + batch[n] |= 1 << 21; + batch[n]++; + batch[++n] = reloc[0].delta;/* lower_32_bits(address) */ + batch[++n] = 0; /* upper_32_bits(address) */ + } else if (gen >= 4) { + batch[++n] = 0; + batch[++n] = reloc[0].delta;/* lower_32_bits(address) */ + reloc[0].offset += sizeof(uint32_t); + } else { + batch[n]--; + batch[++n] = reloc[0].delta;/* lower_32_bits(address) */ + reloc[1].offset -= sizeof(uint32_t); + } + batch[++n] = 0; /* lower_32_bits(value) */ + batch[++n] = 0; /* upper_32_bits(value) / nop */ + batch[++n] = MI_BATCH_BUFFER_END; eb->buffers_ptr = (uintptr_t)&obj; for (unsigned i = 0; i < count; i++) { - uint32_t batch[16]; - unsigned n; - - memset(&obj, 0, sizeof(obj)); obj.handle = handles[i]; - obj.relocs_ptr = (uintptr_t)reloc; - obj.relocation_count = 2; - - memset(reloc, 0, 2*sizeof(*reloc)); reloc[0].target_handle = obj.handle; - reloc[0].offset = eb->batch_start_offset; - reloc[0].offset += sizeof(uint32_t); - reloc[0].delta = BATCH_SIZE - eb->batch_start_offset - 8; - reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc[1].target_handle = obj.handle; - reloc[1].offset = eb->batch_start_offset; - reloc[1].offset += 3*sizeof(uint32_t); - reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION; - - n = 0; - batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); - if (gen >= 8) { - batch[n] |= 1 << 21; - batch[n]++; - batch[++n] = reloc[0].delta;/* lower_32_bits(address) */ - batch[++n] = 0; /* upper_32_bits(address) */ - } else if (gen >= 4) { - batch[++n] = 0; - batch[++n] = reloc[0].delta;/* lower_32_bits(address) */ - reloc[0].offset += sizeof(uint32_t); - } else { - batch[n]--; - batch[++n] = reloc[0].delta;/* lower_32_bits(address) */ - reloc[1].offset -= sizeof(uint32_t); - } - batch[++n] = 0; /* lower_32_bits(value) */ - batch[++n] = 0; /* upper_32_bits(value) / nop */ - batch[++n] = MI_BATCH_BUFFER_END; + + obj.offset = 0; + reloc[0].presumed_offset = obj.offset; + reloc[1].presumed_offset = obj.offset; + gem_write(fd, obj.handle, eb->batch_start_offset, batch, sizeof(batch)); gem_execbuf(fd, eb); } + /* As we have been lying about the write_domain, we need to do a sync */ + gem_sync(fd, obj.handle); } static void fillgtt(int fd, unsigned ring, int timeout) @@ -113,6 +121,7 @@ static void fillgtt(int fd, unsigned ring, int timeout) const int gen = intel_gen(intel_get_drm_devid(fd)); struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_relocation_entry reloc[2]; + volatile uint64_t *shared; unsigned *handles; unsigned engines[16]; unsigned nengine; @@ -120,6 +129,9 @@ static void fillgtt(int fd, unsigned ring, int timeout) uint64_t size; unsigned count; + shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + igt_assert(shared != MAP_FAILED); + nengine = 0; if (ring == 0) { for_each_engine(fd, engine) { @@ -164,6 +176,8 @@ static void fillgtt(int fd, unsigned ring, int timeout) submit(fd, gen, &execbuf, reloc, handles, count); igt_fork(child, nengine) { + uint64_t cycles = 0; + hars_petruska_f54_1_random_perturb(child); igt_permute_array(handles, count, xchg_u32); execbuf.batch_start_offset = child*64; execbuf.flags |= engines[child]; @@ -177,13 +191,21 @@ static void fillgtt(int fd, unsigned ring, int timeout) gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1])); igt_assert_eq_u64(buf[0], buf[1]); } + cycles++; } + shared[child] = cycles; + igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles); } igt_waitchildren(); for (unsigned i = 0; i < count; i++) gem_close(fd, handles[i]); + shared[nengine] = 0; + for (unsigned i = 0; i < nengine; i++) + shared[nengine] += shared[i]; + igt_info("Total: %llu cycles\n", (long long)shared[nengine]); + igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0); } diff --git a/tests/kms_cursor_legacy.c b/tests/kms_cursor_legacy.c index 192a0cf3..7dca383e 100644 --- a/tests/kms_cursor_legacy.c +++ b/tests/kms_cursor_legacy.c @@ -26,6 +26,7 @@ #include <sched.h> #include "igt.h" +#include "igt_rand.h" #include "igt_stats.h" #if defined(__x86_64__) || defined(__i386__) @@ -41,16 +42,6 @@ struct data { drmModeRes *resources; }; -static uint32_t state = 0x12345678; - -static uint32_t -hars_petruska_f54_1_random (void) -{ -#define rol(x,k) ((x << k) | (x >> (32-k))) - return state = (state ^ rol (state, 5) ^ rol (state, 24)) + 0x37798849; -#undef rol -} - static void stress(struct data *data, uint32_t *crtc_id, unsigned num_crtcs, int num_children, unsigned mode, @@ -94,9 +85,9 @@ static void stress(struct data *data, CPU_SET(child, &allowed); sched_setaffinity(getpid(), sizeof(cpu_set_t), &allowed); - state ^= child; + hars_petruska_f54_1_random_perturb(child); igt_until_timeout(timeout) { - arg.crtc_id = crtc_id[hars_petruska_f54_1_random() % num_crtcs]; + arg.crtc_id = crtc_id[hars_petruska_f54_1_random_unsafe() % num_crtcs]; do_ioctl(data->fd, DRM_IOCTL_MODE_CURSOR, &arg); count++; } |