From 49e09e73460b74a56b210b804f32abbcf5eab66a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Mar 2017 11:26:39 +0100 Subject: benchmarks/gem_exec_trace: Enhanced multi-context capture Signed-off-by: Chris Wilson --- benchmarks/gem_exec_trace.c | 297 +++++++++++++++++++++++-------------------- benchmarks/gem_exec_tracer.c | 200 +++++++++++++++++++++-------- 2 files changed, 304 insertions(+), 193 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/gem_exec_trace.c b/benchmarks/gem_exec_trace.c index b487d76b..ddd140f4 100644 --- a/benchmarks/gem_exec_trace.c +++ b/benchmarks/gem_exec_trace.c @@ -47,6 +47,8 @@ enum { ADD_BO = 0, DEL_BO, + ADD_CTX, + DEL_CTX, EXEC, }; @@ -59,44 +61,53 @@ struct trace_del_bo { uint32_t handle; } __attribute__((packed)); +struct trace_add_ctx { + uint32_t handle; +} __attribute__((packed)); + +struct trace_del_ctx { + uint32_t handle; +} __attribute__((packed)); + struct trace_exec { uint32_t object_count; uint64_t flags; -} __attribute__((packed)); + uint32_t context; +}__attribute__((packed)); + struct trace_exec_object { uint32_t handle; uint32_t relocation_count; uint64_t alignment; + uint64_t offset; uint64_t flags; uint64_t rsvd1; uint64_t rsvd2; -} __attribute__((packed)); -struct trace_exec_relocation { - uint32_t target_handle; - uint32_t delta; - uint64_t offset; - uint32_t read_domains; - uint32_t write_domain; -} __attribute__((packed)); +}__attribute__((packed)); static double elapsed(const struct timespec *start, const struct timespec *end) { return 1e3*(end->tv_sec - start->tv_sec) + 1e-6*(end->tv_nsec - start->tv_nsec); } -static void replay(const char *filename) +static uint32_t __gem_context_create(int fd) +{ + struct drm_i915_gem_context_create arg = {}; + drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg); + return arg.ctx_id; +} + +static double replay(const char *filename) { struct timespec t_start, t_end; struct drm_i915_gem_execbuffer2 eb = {}; - struct bo { - uint32_t handle; - uint64_t offset; - - struct drm_i915_gem_relocation_entry *relocs; - uint32_t max_relocs; - } *bo = NULL, **offsets = NULL; - int num_bo = 0; + const struct trace_version { + uint32_t magic; + uint32_t version; + } *tv; struct drm_i915_gem_exec_object2 *exec_objects = NULL; + uint32_t *bo, *ctx; + int num_bo, num_ctx; int max_objects = 0; struct stat st; uint8_t *ptr, *end; @@ -104,149 +115,161 @@ static void replay(const char *filename) fd = open(filename, O_RDONLY); if (fd < 0) - return; + return -1; if (fstat(fd, &st) < 0) - return; + return -1; + + ctx = calloc(1024, sizeof(*ctx)); + num_ctx = 1024; + + bo = calloc(4096, sizeof(*bo)); + num_bo = 4096; ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0); close(fd); if (ptr == MAP_FAILED) - return; + return -1; madvise(ptr, st.st_size, MADV_SEQUENTIAL); end = ptr + st.st_size; fd = drm_open_driver(DRIVER_INTEL); + tv = (struct trace_version *)ptr; + if (tv->magic != 0xdeadbeef) { + fprintf(stderr, "%s: invalid magic\n", filename); + return -1; + } + if (tv->version != 1) { + fprintf(stderr, "%s: unhandled version %d\n", + filename, tv->version); + return -1; + } + ptr = (void *)(tv + 1); + clock_gettime(CLOCK_MONOTONIC, &t_start); - do { - switch (*ptr++) { - case ADD_BO: { - uint32_t bb = 0xa << 23; - struct trace_add_bo *t = (void *)ptr; - ptr = (void *)(t + 1); - - if (t->handle >= num_bo) { - int new_bo = (t->handle + 4096) & -4096; - bo = realloc(bo, sizeof(*bo)*new_bo); - memset(bo + num_bo, 0, sizeof(*bo)*(new_bo - num_bo)); - num_bo = new_bo; - } - - bo[t->handle].handle = gem_create(fd, t->size); - gem_write(fd, bo[t->handle].handle, 0, &bb, sizeof(bb)); - break; - } - case DEL_BO: { - struct trace_del_bo *t = (void *)ptr; - ptr = (void *)(t + 1); - - gem_close(fd, bo[t->handle].handle); - bo[t->handle].handle = 0; - - free(bo[t->handle].relocs); - bo[t->handle].relocs = NULL; - bo[t->handle].max_relocs = 0; - break; - } - case EXEC: { - struct trace_exec *t = (void *)ptr; - uint32_t i, j; - ptr = (void *)(t + 1); - - eb.buffer_count = t->object_count; - eb.flags = t->flags & ~I915_EXEC_RING_MASK; - - if (eb.buffer_count > max_objects) { - free(exec_objects); - free(offsets); - - max_objects = ALIGN(eb.buffer_count, 4096); - - exec_objects = malloc(max_objects*sizeof(*exec_objects)); - offsets = malloc(max_objects*sizeof(*offsets)); - - eb.buffers_ptr = (uintptr_t)exec_objects; - } - - for (i = 0; i < eb.buffer_count; i++) { - struct drm_i915_gem_relocation_entry *relocs; - struct trace_exec_object *to = (void *)ptr; - ptr = (void *)(to + 1); - - offsets[i] = &bo[to->handle]; - - exec_objects[i].handle = bo[to->handle].handle; - exec_objects[i].offset = bo[to->handle].offset; - exec_objects[i].alignment = to->alignment; - exec_objects[i].flags = to->flags; - exec_objects[i].rsvd1 = to->rsvd1; - exec_objects[i].rsvd2 = to->rsvd2; - - exec_objects[i].relocation_count = to->relocation_count; - if (!to->relocation_count) - continue; - - if (to->relocation_count > bo[to->handle].max_relocs) { - free(bo[to->handle].relocs); - - bo[to->handle].max_relocs = ALIGN(to->relocation_count, 128); - bo[to->handle].relocs = malloc(sizeof(*bo[to->handle].relocs)*bo[to->handle].max_relocs); - } - relocs = bo[to->handle].relocs; - exec_objects[i].relocs_ptr = (uintptr_t)relocs; - - for (j = 0; j < to->relocation_count; j++) { - struct trace_exec_relocation *tr = (void *)ptr; - ptr = (void *)(tr + 1); - - if (eb.flags & I915_EXEC_HANDLE_LUT) { - uint32_t handle; - - relocs[j].target_handle = tr->target_handle; - - handle = exec_objects[tr->target_handle].handle; - relocs[j].presumed_offset = bo[handle].offset; - } else { - relocs[j].target_handle = bo[tr->target_handle].handle; - relocs[j].presumed_offset = bo[tr->target_handle].offset; - } - relocs[j].delta = tr->delta; - relocs[j].offset = tr->offset; - relocs[j].read_domains = tr->read_domains; - relocs[j].write_domain = tr->write_domain; - } - } - - gem_execbuf(fd, &eb); - - for (i = 0; i < eb.buffer_count; i++) - offsets[i]->offset = exec_objects[i].offset; - - break; - } + do switch (*ptr++) { + case ADD_BO: + { + const uint32_t bbe = 0xa << 23; + struct trace_add_bo *t = (void *)ptr; + ptr = (void *)(t + 1); + + if (t->handle >= num_bo) { + int new_bo = ALIGN(t->handle, 4096); + bo = realloc(bo, sizeof(*bo)*new_bo); + memset(bo + num_bo, 0, sizeof(*bo)*(new_bo - num_bo)); + num_bo = new_bo; + } + + bo[t->handle] = gem_create(fd, t->size); + gem_write(fd, bo[t->handle], t->size - sizeof(bbe), + &bbe, sizeof(bbe)); + break; + } + case DEL_BO: + { + struct trace_del_bo *t = (void *)ptr; + ptr = (void *)(t + 1); + + gem_close(fd, bo[t->handle]); + bo[t->handle] = 0; + break; + } + case ADD_CTX: + { + struct trace_add_ctx *t = (void *)ptr; + ptr = (void *)(t + 1); + + if (t->handle >= num_ctx) { + int new_ctx = ALIGN(t->handle, 1024); + ctx = realloc(ctx, sizeof(*ctx)*new_ctx); + memset(ctx + num_ctx, 0, sizeof(*ctx)*(new_ctx - num_ctx)); + num_ctx = new_ctx; + } + + ctx[t->handle] = __gem_context_create(fd); + break; + } + case DEL_CTX: + { + struct trace_del_ctx *t = (void *)ptr; + ptr = (void *)(t + 1); + + if (t->handle < num_ctx && ctx[t->handle]) { + gem_context_destroy(fd, ctx[t->handle]); + ctx[t->handle] = 0; + } + break; } + case EXEC: + { + struct trace_exec *t = (void *)ptr; + ptr = (void *)(t + 1); + + eb.buffer_count = t->object_count; + eb.flags = t->flags; + eb.rsvd1 = ctx[t->context]; + + if (eb.buffer_count > max_objects) { + free(exec_objects); + + max_objects = ALIGN(eb.buffer_count, 4096); + + exec_objects = malloc(max_objects*sizeof(*exec_objects)); + eb.buffers_ptr = (uintptr_t)exec_objects; + } + + for (uint32_t i = 0; i < eb.buffer_count; i++) { + struct trace_exec_object *to = (void *)ptr; + ptr = (void *)(to + 1); + + exec_objects[i].handle = bo[to->handle]; + exec_objects[i].alignment = to->alignment; + exec_objects[i].offset = to->offset; + exec_objects[i].flags = to->flags; + exec_objects[i].rsvd1 = to->rsvd1; + exec_objects[i].rsvd2 = to->rsvd2; + + exec_objects[i].relocation_count = to->relocation_count; + exec_objects[i].relocs_ptr = (uintptr_t)ptr; + ptr += sizeof(struct drm_i915_gem_relocation_entry) * to->relocation_count; + } + + gem_execbuf(fd, &eb); + break; + } + + default: + fprintf(stderr, "Unknown cmd: %x\n", *ptr); + return -1; } while (ptr < end); clock_gettime(CLOCK_MONOTONIC, &t_end); - close(fd); - munmap(end-st.st_size, st.st_size); - for (fd = 0; fd < num_bo; fd++) - free(bo[fd].relocs); - free(bo); - free(offsets); - - printf("%s: %.3f\n", filename, elapsed(&t_start, &t_end)); + return elapsed(&t_start, &t_end); } int main(int argc, char **argv) { + double *results; int i; - for (i = 1; i < argc; i++) - replay(argv[i]); + results = mmap(NULL, ALIGN(argc*sizeof(double), 4096), + PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + + igt_fork(child, argc-1) + results[child] = replay(argv[child + 1]); + igt_waitchildren(); + + for (i = 0; i < argc - 1; i++) { + double t = results[i]; + if (t < 0) + printf("%s: failed\n", argv[i+1]); + else + printf("%s: %.3f\n", argv[i+1], t); + } return 0; } diff --git a/benchmarks/gem_exec_tracer.c b/benchmarks/gem_exec_tracer.c index f0057942..9af61420 100644 --- a/benchmarks/gem_exec_tracer.c +++ b/benchmarks/gem_exec_tracer.c @@ -45,17 +45,30 @@ static int (*libc_close)(int fd); static int (*libc_ioctl)(int fd, unsigned long request, void *argp); -static int drm_fd = -1; -static FILE *file; +struct trace { + int fd; + FILE *file; + struct trace *next; +} *traces; #define DRM_MAJOR 226 enum { ADD_BO = 0, DEL_BO, + ADD_CTX, + DEL_CTX, EXEC, }; +static struct trace_verion { + uint32_t magic; + uint32_t version; +} version = { + .magic = 0xdeadbeef, + .version = 1 +}; + struct trace_add_bo { uint8_t cmd; uint32_t handle; @@ -66,16 +79,27 @@ struct trace_del_bo { uint32_t handle; }__attribute__((packed)); +struct trace_add_ctx { + uint8_t cmd; + uint32_t handle; +} __attribute__((packed)); +struct trace_del_ctx { + uint8_t cmd; + uint32_t handle; +}__attribute__((packed)); + struct trace_exec { uint8_t cmd; uint32_t object_count; uint64_t flags; + uint32_t context; }__attribute__((packed)); struct trace_exec_object { uint32_t handle; uint32_t relocation_count; uint64_t alignment; + uint64_t offset; uint64_t flags; uint64_t rsvd1; uint64_t rsvd2; @@ -85,6 +109,7 @@ struct trace_exec_relocation { uint32_t target_handle; uint32_t delta; uint64_t offset; + uint64_t presumed_offset; uint32_t read_domains; uint32_t write_domain; }__attribute__((packed)); @@ -101,71 +126,95 @@ fail_if(int cond, const char *format, ...) vfprintf(stderr, format, args); va_end(args); - exit(1); + abort(); } static void -trace_exec(int fd, const struct drm_i915_gem_execbuffer2 *execbuffer2) +trace_exec(struct trace *trace, + const struct drm_i915_gem_execbuffer2 *execbuffer2) { +#define to_ptr(T, x) ((T *)(uintptr_t)(x)) const struct drm_i915_gem_exec_object2 *exec_objects = - (struct drm_i915_gem_exec_object2 *)(uintptr_t)execbuffer2->buffers_ptr; + to_ptr(typeof(*exec_objects), execbuffer2->buffers_ptr); + + fail_if(execbuffer2->flags & (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_OUT), + "fences not supported yet\n"); { struct trace_exec t = { - EXEC, execbuffer2->buffer_count, execbuffer2->flags + EXEC, + execbuffer2->buffer_count, + execbuffer2->flags, + execbuffer2->rsvd1, }; - fwrite(&t, sizeof(t), 1, file); + fwrite(&t, sizeof(t), 1, trace->file); } for (uint32_t i = 0; i < execbuffer2->buffer_count; i++) { const struct drm_i915_gem_exec_object2 *obj = &exec_objects[i]; const struct drm_i915_gem_relocation_entry *relocs = - (struct drm_i915_gem_relocation_entry *)(uintptr_t)obj->relocs_ptr; + to_ptr(typeof(*relocs), obj->relocs_ptr); { struct trace_exec_object t = { obj->handle, obj->relocation_count, obj->alignment, + obj->offset, obj->flags, obj->rsvd1, obj->rsvd2 }; - fwrite(&t, sizeof(t), 1, file); - } - for (uint32_t j = 0; j < obj->relocation_count; j++) { - struct trace_exec_relocation t = { - relocs[j].target_handle, - relocs[j].delta, - relocs[j].offset, - relocs[j].read_domains, - relocs[j].write_domain, - }; - fwrite(&t, sizeof(t), 1, file); + fwrite(&t, sizeof(t), 1, trace->file); } + fwrite(relocs, sizeof(*relocs), obj->relocation_count, + trace->file); } - fflush(file); + fflush(trace->file); +#undef to_ptr } static void -trace_add(uint32_t handle, uint64_t size) +trace_add(struct trace *trace, uint32_t handle, uint64_t size) { struct trace_add_bo t = { ADD_BO, handle, size }; - fwrite(&t, sizeof(t), 1, file); + fwrite(&t, sizeof(t), 1, trace->file); } static void -trace_del(uint32_t handle) +trace_del(struct trace *trace, uint32_t handle) { struct trace_del_bo t = { DEL_BO, handle }; - fwrite(&t, sizeof(t), 1, file); + fwrite(&t, sizeof(t), 1, trace->file); +} + +static void +trace_add_context(struct trace *trace, uint32_t handle) +{ + struct trace_add_ctx t = { ADD_CTX, handle }; + fwrite(&t, sizeof(t), 1, trace->file); +} + +static void +trace_del_context(struct trace *trace, uint32_t handle) +{ + struct trace_del_ctx t = { DEL_CTX, handle }; + fwrite(&t, sizeof(t), 1, trace->file); } int close(int fd) { - if (fd == drm_fd) - drm_fd = -1; + struct trace *t, **p; + + for (p = &traces; (t = *p); p = &t->next) { + if (t->fd == fd) { + *p = t->next; + fclose(t->file); + free(t); + break; + } + } return libc_close(fd); } @@ -186,14 +235,14 @@ size_for_fb(const struct drm_mode_fb_cmd *cmd) static int is_i915(int fd) { - drm_version_t version; + drm_version_t v; char name[5] = ""; - memset(&version, 0, sizeof(version)); - version.name_len = 4; - version.name = name; + memset(&v, 0, sizeof(v)); + v.name_len = 4; + v.name = name; - if (libc_ioctl(fd, DRM_IOCTL_VERSION, &version)) + if (libc_ioctl(fd, DRM_IOCTL_VERSION, &v)) return 0; return strcmp(name, "i915") == 0; @@ -202,6 +251,7 @@ static int is_i915(int fd) int ioctl(int fd, unsigned long request, ...) { + struct trace *t, **p; va_list args; void *argp; int ret; @@ -210,53 +260,82 @@ ioctl(int fd, unsigned long request, ...) argp = va_arg(args, void *); va_end(args); - ret = libc_ioctl(fd, request, argp); - if (ret) - return ret; - if (_IOC_TYPE(request) != DRM_IOCTL_BASE) - return 0; - - if (drm_fd != fd) { + goto untraced; + + for (p = &traces; (t = *p); p = &t->next) { + if (fd == t->fd) { + if (traces != t) { + *p = t->next; + t->next = traces; + traces = t; + } + break; + } + } + if (!t) { char filename[80]; if (!is_i915(fd)) - return 0; + goto untraced; - if (file) - fclose(file); + t = malloc(sizeof(*t)); + if (!t) + return -ENOMEM; - sprintf(filename, "/tmp/trace.%d", fd); - file = fopen(filename, "w+"); - drm_fd = fd; + sprintf(filename, "/tmp/trace-%d.%d", getpid(), fd); + t->file = fopen(filename, "w+"); + t->fd = fd; + + if (!fwrite(&version, sizeof(version), 1, t->file)) { + fclose(t->file); + free(t); + return -ENOMEM; + } + + t->next = traces; + traces = t; } switch (request) { case DRM_IOCTL_I915_GEM_EXECBUFFER2: - trace_exec(fd, argp); + case DRM_IOCTL_I915_GEM_EXECBUFFER2_WR: + trace_exec(t, argp); + break; + + case DRM_IOCTL_GEM_CLOSE: { + struct drm_gem_close *close = argp; + trace_del(t, close->handle); break; + } + + case DRM_IOCTL_I915_GEM_CONTEXT_DESTROY: { + struct drm_i915_gem_context_destroy *close = argp; + trace_del_context(t, close->ctx_id); + break; + } + } + ret = libc_ioctl(fd, request, argp); + if (ret) + return ret; + + switch (request) { case DRM_IOCTL_I915_GEM_CREATE: { struct drm_i915_gem_create *create = argp; - trace_add(create->handle, create->size); + trace_add(t, create->handle, create->size); break; } case DRM_IOCTL_I915_GEM_USERPTR: { struct drm_i915_gem_userptr *userptr = argp; - trace_add(userptr->handle, userptr->user_size); - break; - } - - case DRM_IOCTL_GEM_CLOSE: { - struct drm_gem_close *close = argp; - trace_del(close->handle); + trace_add(t, userptr->handle, userptr->user_size); break; } case DRM_IOCTL_GEM_OPEN: { struct drm_gem_open *open = argp; - trace_add(open->handle, open->size); + trace_add(t, open->handle, open->size); break; } @@ -264,18 +343,27 @@ ioctl(int fd, unsigned long request, ...) struct drm_prime_handle *prime = argp; off_t size = lseek(prime->fd, 0, SEEK_END); fail_if(size == -1, "failed to get prime bo size\n"); - trace_add(prime->handle, size); + trace_add(t, prime->handle, size); break; } case DRM_IOCTL_MODE_GETFB: { struct drm_mode_fb_cmd *cmd = argp; - trace_add(cmd->handle, size_for_fb(cmd)); + trace_add(t, cmd->handle, size_for_fb(cmd)); + break; + } + + case DRM_IOCTL_I915_GEM_CONTEXT_CREATE: { + struct drm_i915_gem_context_create *create = argp; + trace_add_context(t, create->ctx_id); break; } } return 0; + +untraced: + return libc_ioctl(fd, request, argp); } static void __attribute__ ((constructor)) -- cgit v1.2.3