From 49e09e73460b74a56b210b804f32abbcf5eab66a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Mar 2017 11:26:39 +0100 Subject: benchmarks/gem_exec_trace: Enhanced multi-context capture Signed-off-by: Chris Wilson --- benchmarks/gem_exec_trace.c | 297 ++++++++++++++++++++++++-------------------- 1 file changed, 160 insertions(+), 137 deletions(-) (limited to 'benchmarks/gem_exec_trace.c') diff --git a/benchmarks/gem_exec_trace.c b/benchmarks/gem_exec_trace.c index b487d76b..ddd140f4 100644 --- a/benchmarks/gem_exec_trace.c +++ b/benchmarks/gem_exec_trace.c @@ -47,6 +47,8 @@ enum { ADD_BO = 0, DEL_BO, + ADD_CTX, + DEL_CTX, EXEC, }; @@ -59,44 +61,53 @@ struct trace_del_bo { uint32_t handle; } __attribute__((packed)); +struct trace_add_ctx { + uint32_t handle; +} __attribute__((packed)); + +struct trace_del_ctx { + uint32_t handle; +} __attribute__((packed)); + struct trace_exec { uint32_t object_count; uint64_t flags; -} __attribute__((packed)); + uint32_t context; +}__attribute__((packed)); + struct trace_exec_object { uint32_t handle; uint32_t relocation_count; uint64_t alignment; + uint64_t offset; uint64_t flags; uint64_t rsvd1; uint64_t rsvd2; -} __attribute__((packed)); -struct trace_exec_relocation { - uint32_t target_handle; - uint32_t delta; - uint64_t offset; - uint32_t read_domains; - uint32_t write_domain; -} __attribute__((packed)); +}__attribute__((packed)); static double elapsed(const struct timespec *start, const struct timespec *end) { return 1e3*(end->tv_sec - start->tv_sec) + 1e-6*(end->tv_nsec - start->tv_nsec); } -static void replay(const char *filename) +static uint32_t __gem_context_create(int fd) +{ + struct drm_i915_gem_context_create arg = {}; + drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg); + return arg.ctx_id; +} + +static double replay(const char *filename) { struct timespec t_start, t_end; struct drm_i915_gem_execbuffer2 eb = {}; - struct bo { - uint32_t handle; - uint64_t offset; - - struct drm_i915_gem_relocation_entry *relocs; - uint32_t max_relocs; - } *bo = NULL, **offsets = NULL; - int num_bo = 0; + const struct trace_version { + uint32_t magic; + uint32_t version; + } *tv; struct drm_i915_gem_exec_object2 *exec_objects = NULL; + uint32_t *bo, *ctx; + int num_bo, num_ctx; int max_objects = 0; struct stat st; uint8_t *ptr, *end; @@ -104,149 +115,161 @@ static void replay(const char *filename) fd = open(filename, O_RDONLY); if (fd < 0) - return; + return -1; if (fstat(fd, &st) < 0) - return; + return -1; + + ctx = calloc(1024, sizeof(*ctx)); + num_ctx = 1024; + + bo = calloc(4096, sizeof(*bo)); + num_bo = 4096; ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0); close(fd); if (ptr == MAP_FAILED) - return; + return -1; madvise(ptr, st.st_size, MADV_SEQUENTIAL); end = ptr + st.st_size; fd = drm_open_driver(DRIVER_INTEL); + tv = (struct trace_version *)ptr; + if (tv->magic != 0xdeadbeef) { + fprintf(stderr, "%s: invalid magic\n", filename); + return -1; + } + if (tv->version != 1) { + fprintf(stderr, "%s: unhandled version %d\n", + filename, tv->version); + return -1; + } + ptr = (void *)(tv + 1); + clock_gettime(CLOCK_MONOTONIC, &t_start); - do { - switch (*ptr++) { - case ADD_BO: { - uint32_t bb = 0xa << 23; - struct trace_add_bo *t = (void *)ptr; - ptr = (void *)(t + 1); - - if (t->handle >= num_bo) { - int new_bo = (t->handle + 4096) & -4096; - bo = realloc(bo, sizeof(*bo)*new_bo); - memset(bo + num_bo, 0, sizeof(*bo)*(new_bo - num_bo)); - num_bo = new_bo; - } - - bo[t->handle].handle = gem_create(fd, t->size); - gem_write(fd, bo[t->handle].handle, 0, &bb, sizeof(bb)); - break; - } - case DEL_BO: { - struct trace_del_bo *t = (void *)ptr; - ptr = (void *)(t + 1); - - gem_close(fd, bo[t->handle].handle); - bo[t->handle].handle = 0; - - free(bo[t->handle].relocs); - bo[t->handle].relocs = NULL; - bo[t->handle].max_relocs = 0; - break; - } - case EXEC: { - struct trace_exec *t = (void *)ptr; - uint32_t i, j; - ptr = (void *)(t + 1); - - eb.buffer_count = t->object_count; - eb.flags = t->flags & ~I915_EXEC_RING_MASK; - - if (eb.buffer_count > max_objects) { - free(exec_objects); - free(offsets); - - max_objects = ALIGN(eb.buffer_count, 4096); - - exec_objects = malloc(max_objects*sizeof(*exec_objects)); - offsets = malloc(max_objects*sizeof(*offsets)); - - eb.buffers_ptr = (uintptr_t)exec_objects; - } - - for (i = 0; i < eb.buffer_count; i++) { - struct drm_i915_gem_relocation_entry *relocs; - struct trace_exec_object *to = (void *)ptr; - ptr = (void *)(to + 1); - - offsets[i] = &bo[to->handle]; - - exec_objects[i].handle = bo[to->handle].handle; - exec_objects[i].offset = bo[to->handle].offset; - exec_objects[i].alignment = to->alignment; - exec_objects[i].flags = to->flags; - exec_objects[i].rsvd1 = to->rsvd1; - exec_objects[i].rsvd2 = to->rsvd2; - - exec_objects[i].relocation_count = to->relocation_count; - if (!to->relocation_count) - continue; - - if (to->relocation_count > bo[to->handle].max_relocs) { - free(bo[to->handle].relocs); - - bo[to->handle].max_relocs = ALIGN(to->relocation_count, 128); - bo[to->handle].relocs = malloc(sizeof(*bo[to->handle].relocs)*bo[to->handle].max_relocs); - } - relocs = bo[to->handle].relocs; - exec_objects[i].relocs_ptr = (uintptr_t)relocs; - - for (j = 0; j < to->relocation_count; j++) { - struct trace_exec_relocation *tr = (void *)ptr; - ptr = (void *)(tr + 1); - - if (eb.flags & I915_EXEC_HANDLE_LUT) { - uint32_t handle; - - relocs[j].target_handle = tr->target_handle; - - handle = exec_objects[tr->target_handle].handle; - relocs[j].presumed_offset = bo[handle].offset; - } else { - relocs[j].target_handle = bo[tr->target_handle].handle; - relocs[j].presumed_offset = bo[tr->target_handle].offset; - } - relocs[j].delta = tr->delta; - relocs[j].offset = tr->offset; - relocs[j].read_domains = tr->read_domains; - relocs[j].write_domain = tr->write_domain; - } - } - - gem_execbuf(fd, &eb); - - for (i = 0; i < eb.buffer_count; i++) - offsets[i]->offset = exec_objects[i].offset; - - break; - } + do switch (*ptr++) { + case ADD_BO: + { + const uint32_t bbe = 0xa << 23; + struct trace_add_bo *t = (void *)ptr; + ptr = (void *)(t + 1); + + if (t->handle >= num_bo) { + int new_bo = ALIGN(t->handle, 4096); + bo = realloc(bo, sizeof(*bo)*new_bo); + memset(bo + num_bo, 0, sizeof(*bo)*(new_bo - num_bo)); + num_bo = new_bo; + } + + bo[t->handle] = gem_create(fd, t->size); + gem_write(fd, bo[t->handle], t->size - sizeof(bbe), + &bbe, sizeof(bbe)); + break; + } + case DEL_BO: + { + struct trace_del_bo *t = (void *)ptr; + ptr = (void *)(t + 1); + + gem_close(fd, bo[t->handle]); + bo[t->handle] = 0; + break; + } + case ADD_CTX: + { + struct trace_add_ctx *t = (void *)ptr; + ptr = (void *)(t + 1); + + if (t->handle >= num_ctx) { + int new_ctx = ALIGN(t->handle, 1024); + ctx = realloc(ctx, sizeof(*ctx)*new_ctx); + memset(ctx + num_ctx, 0, sizeof(*ctx)*(new_ctx - num_ctx)); + num_ctx = new_ctx; + } + + ctx[t->handle] = __gem_context_create(fd); + break; + } + case DEL_CTX: + { + struct trace_del_ctx *t = (void *)ptr; + ptr = (void *)(t + 1); + + if (t->handle < num_ctx && ctx[t->handle]) { + gem_context_destroy(fd, ctx[t->handle]); + ctx[t->handle] = 0; + } + break; } + case EXEC: + { + struct trace_exec *t = (void *)ptr; + ptr = (void *)(t + 1); + + eb.buffer_count = t->object_count; + eb.flags = t->flags; + eb.rsvd1 = ctx[t->context]; + + if (eb.buffer_count > max_objects) { + free(exec_objects); + + max_objects = ALIGN(eb.buffer_count, 4096); + + exec_objects = malloc(max_objects*sizeof(*exec_objects)); + eb.buffers_ptr = (uintptr_t)exec_objects; + } + + for (uint32_t i = 0; i < eb.buffer_count; i++) { + struct trace_exec_object *to = (void *)ptr; + ptr = (void *)(to + 1); + + exec_objects[i].handle = bo[to->handle]; + exec_objects[i].alignment = to->alignment; + exec_objects[i].offset = to->offset; + exec_objects[i].flags = to->flags; + exec_objects[i].rsvd1 = to->rsvd1; + exec_objects[i].rsvd2 = to->rsvd2; + + exec_objects[i].relocation_count = to->relocation_count; + exec_objects[i].relocs_ptr = (uintptr_t)ptr; + ptr += sizeof(struct drm_i915_gem_relocation_entry) * to->relocation_count; + } + + gem_execbuf(fd, &eb); + break; + } + + default: + fprintf(stderr, "Unknown cmd: %x\n", *ptr); + return -1; } while (ptr < end); clock_gettime(CLOCK_MONOTONIC, &t_end); - close(fd); - munmap(end-st.st_size, st.st_size); - for (fd = 0; fd < num_bo; fd++) - free(bo[fd].relocs); - free(bo); - free(offsets); - - printf("%s: %.3f\n", filename, elapsed(&t_start, &t_end)); + return elapsed(&t_start, &t_end); } int main(int argc, char **argv) { + double *results; int i; - for (i = 1; i < argc; i++) - replay(argv[i]); + results = mmap(NULL, ALIGN(argc*sizeof(double), 4096), + PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + + igt_fork(child, argc-1) + results[child] = replay(argv[child + 1]); + igt_waitchildren(); + + for (i = 0; i < argc - 1; i++) { + double t = results[i]; + if (t < 0) + printf("%s: failed\n", argv[i+1]); + else + printf("%s: %.3f\n", argv[i+1], t); + } return 0; } -- cgit v1.2.3