diff options
34 files changed, 314 insertions, 2327 deletions
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 1f05adf3..45b923eb 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -25,4 +25,4 @@ gem_latency_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_latency_LDADD = $(LDADD) -lpthread gem_syslatency_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_syslatency_LDADD = $(LDADD) -lpthread -gem_wsim_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la -lpthread +gem_wsim_LDADD = $(LDADD) -lpthread diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources index ee045fb3..dae3cdda 100644 --- a/benchmarks/Makefile.sources +++ b/benchmarks/Makefile.sources @@ -19,12 +19,6 @@ benchmarks_prog_list = \ vgem_mmap \ $(NULL) -gem_wsim_SOURCES = \ - gem_wsim.c \ - ewma.h \ - ilog2.h \ - $(NULL) - LIBDRM_INTEL_BENCHMARKS = \ intel_upload_blit_large \ intel_upload_blit_large_gtt \ diff --git a/benchmarks/ewma.h b/benchmarks/ewma.h deleted file mode 100644 index 8711004e..00000000 --- a/benchmarks/ewma.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef EWMA_H -#define EWMA_H - -#include <ilog2.h> - -#define BUILD_BUG_ON(expr) -#define BUILD_BUG_ON_NOT_POWER_OF_2(expr) - -/* - * Exponentially weighted moving average (EWMA) - * - * This implements a fixed-precision EWMA algorithm, with both the - * precision and fall-off coefficient determined at compile-time - * and built into the generated helper funtions. - * - * The first argument to the macro is the name that will be used - * for the struct and helper functions. - * - * The second argument, the precision, expresses how many bits are - * used for the fractional part of the fixed-precision values. - * - * The third argument, the weight reciprocal, determines how the - * new values will be weighed vs. the old state, new values will - * get weight 1/weight_rcp and old values 1-1/weight_rcp. Note - * that this parameter must be a power of two for efficiency. - */ - -#define DECLARE_EWMA(T, name, _precision, _weight_rcp) \ - struct ewma_##name { \ - T internal; \ - }; \ - static inline void ewma_##name##_init(struct ewma_##name *e) \ - { \ - BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ - /* \ - * Even if you want to feed it just 0/1 you should have \ - * some bits for the non-fractional part... \ - */ \ - BUILD_BUG_ON((_precision) > 30); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ - e->internal = 0; \ - } \ - static inline T \ - ewma_##name##_read(struct ewma_##name *e) \ - { \ - BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ - BUILD_BUG_ON((_precision) > 30); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ - return e->internal >> (_precision); \ - } \ - static inline void ewma_##name##_add(struct ewma_##name *e, \ - T val) \ - { \ - const T weight_rcp = ilog2(_weight_rcp); \ - const T precision = _precision; \ - T internal = e->internal; \ - \ - BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ - BUILD_BUG_ON((_precision) > 30); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ - \ - e->internal = internal ? \ - (((internal << weight_rcp) - internal) + \ - (val << precision)) >> weight_rcp : \ - (val << precision); \ - } - -#endif /* EWMA_H */ diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c index ad4edb93..5cc71c56 100644 --- a/benchmarks/gem_wsim.c +++ b/benchmarks/gem_wsim.c @@ -55,7 +55,6 @@ #include "sw_sync.h" #include "i915/gem_mman.h" -#include "ewma.h" #include "i915/gem_engine_topology.h" enum intel_engine_id { @@ -154,21 +153,12 @@ struct w_step struct drm_i915_gem_execbuffer2 eb; struct drm_i915_gem_exec_object2 *obj; - struct drm_i915_gem_relocation_entry reloc[5]; + struct drm_i915_gem_relocation_entry reloc[1]; unsigned long bb_sz; uint32_t bb_handle; - uint32_t *seqno_value; - uint32_t *seqno_address; - uint32_t *rt0_value; - uint32_t *rt0_address; - uint32_t *rt1_address; - uint32_t *latch_value; - uint32_t *latch_address; uint32_t *recursive_bb_start; }; -DECLARE_EWMA(uint64_t, rt, 4, 2) - struct ctx { uint32_t id; int priority; @@ -176,9 +166,7 @@ struct ctx { enum intel_engine_id *engine_map; unsigned int bond_count; struct bond *bonds; - bool targets_instance; - bool wants_balance; - unsigned int static_vcs; + bool load_balance; uint64_t sseu; }; @@ -194,13 +182,11 @@ struct workload pthread_t thread; bool run; bool background; - const struct workload_balancer *balancer; unsigned int repeat; unsigned int flags; bool print_stats; uint32_t bb_prng; - uint32_t prng; struct timespec repeat_start; @@ -210,73 +196,25 @@ struct workload int sync_timeline; uint32_t sync_seqno; - uint32_t seqno[NUM_ENGINES]; - struct drm_i915_gem_exec_object2 status_object[2]; - uint32_t *status_page; - uint32_t *status_cs; - unsigned int vcs_rr; - - unsigned long qd_sum[NUM_ENGINES]; - unsigned long nr_bb[NUM_ENGINES]; - struct igt_list_head requests[NUM_ENGINES]; unsigned int nrequest[NUM_ENGINES]; - - struct workload *global_wrk; - const struct workload_balancer *global_balancer; - pthread_mutex_t mutex; - - union { - struct rtavg { - struct ewma_rt avg[NUM_ENGINES]; - uint32_t last[NUM_ENGINES]; - } rt; - }; - - struct busy_balancer { - int fd; - bool first; - unsigned int num_engines; - unsigned int engine_map[NUM_ENGINES]; - uint64_t t_prev; - uint64_t prev[NUM_ENGINES]; - double busy[NUM_ENGINES]; - } busy_balancer; }; -struct intel_mmio_data mmio_data; static const unsigned int nop_calibration_us = 1000; static bool has_nop_calibration = false; static bool sequential = true; static unsigned int master_prng; -static unsigned int context_vcs_rr; - static int verbose = 1; static int fd; static struct drm_i915_gem_context_param_sseu device_sseu = { .slice_mask = -1 /* Force read on first use. */ }; -#define SWAPVCS (1<<0) -#define SEQNO (1<<1) -#define BALANCE (1<<2) -#define RT (1<<3) -#define VCS2REMAP (1<<4) -#define INITVCSRR (1<<5) -#define SYNCEDCLIENTS (1<<6) -#define HEARTBEAT (1<<7) -#define GLOBAL_BALANCE (1<<8) -#define DEPSYNC (1<<9) -#define I915 (1<<10) -#define SSEU (1<<11) - -#define SEQNO_IDX(engine) ((engine) * 16) -#define SEQNO_OFFSET(engine) (SEQNO_IDX(engine) * sizeof(uint32_t)) - -#define RCS_TIMESTAMP (0x2000 + 0x358) -#define REG(x) (volatile uint32_t *)((volatile char *)igt_global_mmio + x) +#define SYNCEDCLIENTS (1<<1) +#define DEPSYNC (1<<2) +#define SSEU (1<<3) static const char *ring_str_map[NUM_ENGINES] = { [DEFAULT] = "DEFAULT", @@ -579,26 +517,6 @@ static unsigned int num_engines_in_class(enum intel_engine_id class) } static void -fill_engines_class(struct i915_engine_class_instance *ci, - enum intel_engine_id class) -{ - unsigned int i, j = 0; - - igt_assert(class == VCS); - - query_engines(); - - for (i = 0; i < __num_engines; i++) { - if (__engines[i].engine_class != I915_ENGINE_CLASS_VIDEO) - continue; - - ci[j].engine_class = __engines[i].engine_class; - ci[j].engine_instance = __engines[i].engine_instance; - j++; - } -} - -static void fill_engines_id_class(enum intel_engine_id *list, enum intel_engine_id class) { @@ -744,7 +662,6 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w) char *_token, *token, *tctx = NULL, *tstart = desc; char *field, *fctx = NULL, *fstart; struct w_step step, *steps = NULL; - bool bcs_used = false; unsigned int valid; int i, j, tmp; @@ -962,9 +879,6 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w) valid++; step.engine = i; - - if (step.engine == BCS) - bcs_used = true; } if ((field = strtok_r(fstart, ".", &fctx))) { @@ -1089,9 +1003,6 @@ add_step: } } - if (bcs_used && (flags & VCS2REMAP) && verbose) - printf("BCS usage in workload with VCS2 remapping enabled!\n"); - return wrk; } @@ -1147,7 +1058,7 @@ static unsigned int get_duration(struct workload *wrk, struct w_step *w) static struct ctx * __get_ctx(struct workload *wrk, const struct w_step *w) { - return &wrk->ctx_list[w->context * 2]; + return &wrk->ctx_list[w->context]; } static unsigned long @@ -1179,8 +1090,7 @@ get_bb_sz(const struct w_step *w, unsigned int duration) return d; } -static void -init_bb(struct w_step *w, unsigned int flags) +static void init_bb(struct w_step *w) { const unsigned int arb_period = __get_bb_sz(w, w->preempt_us) / sizeof(uint32_t); @@ -1202,8 +1112,7 @@ init_bb(struct w_step *w, unsigned int flags) munmap(ptr, mmap_len); } -static unsigned int -terminate_bb(struct w_step *w, unsigned int flags) +static unsigned int terminate_bb(struct w_step *w) { const uint32_t bbe = 0xa << 23; unsigned long mmap_start, mmap_len; @@ -1211,13 +1120,7 @@ terminate_bb(struct w_step *w, unsigned int flags) unsigned int r = 0; uint32_t *ptr, *cs; - igt_assert(((flags & RT) && (flags & SEQNO)) || !(flags & RT)); - batch_start -= sizeof(uint32_t); /* bbend */ - if (flags & SEQNO) - batch_start -= 4 * sizeof(uint32_t); - if (flags & RT) - batch_start -= 12 * sizeof(uint32_t); if (w->unbound_duration) batch_start -= 4 * sizeof(uint32_t); /* MI_ARB_CHK + MI_BATCH_BUFFER_START */ @@ -1242,49 +1145,6 @@ terminate_bb(struct w_step *w, unsigned int flags) *cs++ = 0; } - if (flags & SEQNO) { - w->reloc[r++].offset = batch_start + sizeof(uint32_t); - batch_start += 4 * sizeof(uint32_t); - - *cs++ = MI_STORE_DWORD_IMM; - w->seqno_address = cs; - *cs++ = 0; - *cs++ = 0; - w->seqno_value = cs; - *cs++ = 0; - } - - if (flags & RT) { - w->reloc[r++].offset = batch_start + sizeof(uint32_t); - batch_start += 4 * sizeof(uint32_t); - - *cs++ = MI_STORE_DWORD_IMM; - w->rt0_address = cs; - *cs++ = 0; - *cs++ = 0; - w->rt0_value = cs; - *cs++ = 0; - - w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t); - batch_start += 4 * sizeof(uint32_t); - - *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */ - *cs++ = RCS_TIMESTAMP; - w->rt1_address = cs; - *cs++ = 0; - *cs++ = 0; - - w->reloc[r++].offset = batch_start + sizeof(uint32_t); - batch_start += 4 * sizeof(uint32_t); - - *cs++ = MI_STORE_DWORD_IMM; - w->latch_address = cs; - *cs++ = 0; - *cs++ = 0; - w->latch_value = cs; - *cs++ = 0; - } - *cs = bbe; return r; @@ -1301,17 +1161,9 @@ static const unsigned int eb_engine_map[NUM_ENGINES] = { }; static void -eb_set_engine(struct drm_i915_gem_execbuffer2 *eb, - enum intel_engine_id engine, - unsigned int flags) +eb_set_engine(struct drm_i915_gem_execbuffer2 *eb, enum intel_engine_id engine) { - if (engine == VCS2 && (flags & VCS2REMAP)) - engine = BCS; - - if ((flags & I915) && engine == VCS) - eb->flags = 0; - else - eb->flags = eb_engine_map[engine]; + eb->flags = eb_engine_map[engine]; } static unsigned int @@ -1324,20 +1176,20 @@ find_engine_in_map(struct ctx *ctx, enum intel_engine_id engine) return i + 1; } - igt_assert(ctx->wants_balance); + igt_assert(ctx->load_balance); return 0; } static void eb_update_flags(struct workload *wrk, struct w_step *w, - enum intel_engine_id engine, unsigned int flags) + enum intel_engine_id engine) { struct ctx *ctx = __get_ctx(wrk, w); if (ctx->engine_map) w->eb.flags = find_engine_in_map(ctx, engine); else - eb_set_engine(&w->eb, engine, flags); + eb_set_engine(&w->eb, engine); w->eb.flags |= I915_EXEC_HANDLE_LUT; w->eb.flags |= I915_EXEC_NO_RELOC; @@ -1347,32 +1199,18 @@ eb_update_flags(struct workload *wrk, struct w_step *w, w->eb.flags |= I915_EXEC_FENCE_OUT; } -static struct drm_i915_gem_exec_object2 * -get_status_objects(struct workload *wrk) -{ - if (wrk->flags & GLOBAL_BALANCE) - return wrk->global_wrk->status_object; - else - return wrk->status_object; -} - static uint32_t get_ctxid(struct workload *wrk, struct w_step *w) { - struct ctx *ctx = __get_ctx(wrk, w); - - if (ctx->targets_instance && ctx->wants_balance && w->engine == VCS) - return wrk->ctx_list[w->context * 2 + 1].id; - else - return wrk->ctx_list[w->context * 2].id; + return wrk->ctx_list[w->context].id; } static void -alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags) +alloc_step_batch(struct workload *wrk, struct w_step *w) { enum intel_engine_id engine = w->engine; unsigned int j = 0; - unsigned int nr_obj = 3 + w->data_deps.nr; + unsigned int nr_obj = 2 + w->data_deps.nr; unsigned int i; w->obj = calloc(nr_obj, sizeof(*w->obj)); @@ -1383,11 +1221,6 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags) j++; igt_assert(j < nr_obj); - if (flags & SEQNO) { - w->obj[j++] = get_status_objects(wrk)[0]; - igt_assert(j < nr_obj); - } - for (i = 0; i < w->data_deps.nr; i++) { igt_assert(w->data_deps.list[i] <= 0); if (w->data_deps.list[i]) { @@ -1410,26 +1243,20 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags) w->bb_sz = get_bb_sz(w, w->duration.max); w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0)); - init_bb(w, flags); - w->obj[j].relocation_count = terminate_bb(w, flags); + init_bb(w); + w->obj[j].relocation_count = terminate_bb(w); if (w->obj[j].relocation_count) { + igt_assert(w->unbound_duration); w->obj[j].relocs_ptr = to_user_pointer(&w->reloc); - for (i = 0; i < w->obj[j].relocation_count; i++) - w->reloc[i].target_handle = 1; - if (w->unbound_duration) - w->reloc[0].target_handle = j; + w->reloc[0].target_handle = j; } w->eb.buffers_ptr = to_user_pointer(w->obj); w->eb.buffer_count = j + 1; w->eb.rsvd1 = get_ctxid(wrk, w); - if (flags & SWAPVCS && engine == VCS1) - engine = VCS2; - else if (flags & SWAPVCS && engine == VCS2) - engine = VCS1; - eb_update_flags(wrk, w, engine, flags); + eb_update_flags(wrk, w, engine); #ifdef DEBUG printf("%u: %u:|", w->idx, w->eb.buffer_count); for (i = 0; i <= j; i++) @@ -1528,7 +1355,7 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask) if (slice_mask == -1) slice_mask = device_sseu.slice_mask; - if (ctx->engine_map && ctx->wants_balance) { + if (ctx->engine_map && ctx->load_balance) { sseu.flags = I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX; sseu.engine.engine_class = I915_ENGINE_CLASS_INVALID; sseu.engine.engine_instance = 0; @@ -1566,51 +1393,22 @@ static size_t sizeof_engines_bond(int count) #define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); }) -static int -prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) +static int prepare_workload(unsigned int id, struct workload *wrk) { - unsigned int ctx_vcs; + uint32_t share_vm = 0; int max_ctx = -1; struct w_step *w; int i, j; wrk->id = id; - wrk->prng = rand(); wrk->bb_prng = (wrk->flags & SYNCEDCLIENTS) ? master_prng : rand(); wrk->run = true; - ctx_vcs = 0; - if (flags & INITVCSRR) - ctx_vcs = id & 1; - wrk->vcs_rr = ctx_vcs; - - if (flags & GLOBAL_BALANCE) { - int ret = pthread_mutex_init(&wrk->mutex, NULL); - igt_assert(ret == 0); - } - - if (flags & SEQNO) { - if (!(flags & GLOBAL_BALANCE) || id == 0) { - uint32_t handle; - - handle = gem_create(fd, 4096); - gem_set_caching(fd, handle, I915_CACHING_CACHED); - wrk->status_object[0].handle = handle; - wrk->status_page = gem_mmap__cpu(fd, handle, 0, 4096, - PROT_READ); - - handle = gem_create(fd, 4096); - wrk->status_object[1].handle = handle; - wrk->status_cs = gem_mmap__wc(fd, handle, - 0, 4096, PROT_WRITE); - } - } - /* * Pre-scan workload steps to allocate context list storage. */ for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { - int ctx = w->context * 2 + 1; /* Odd slots are special. */ + int ctx = w->context + 1; int delta; w->wrk = wrk; @@ -1630,27 +1428,16 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) } /* - * Identify if contexts target specific engine instances and if they - * want to be balanced. - * * Transfer over engine map configuration from the workload step. */ - for (j = 0; j < wrk->nr_ctxs; j += 2) { + for (j = 0; j < wrk->nr_ctxs; j++) { struct ctx *ctx = &wrk->ctx_list[j]; - bool targets = false; - bool balance = false; - for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { - if (w->context != (j / 2)) + if (w->context != j) continue; - if (w->type == BATCH) { - if (w->engine == VCS) - balance = true; - else - targets = true; - } else if (w->type == ENGINE_MAP) { + if (w->type == ENGINE_MAP) { ctx->engine_map = w->engine_map; ctx->engine_map_count = w->engine_map_count; } else if (w->type == LOAD_BALANCE) { @@ -1658,9 +1445,9 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) wsim_err("Load balancing needs an engine map!\n"); return 1; } - ctx->wants_balance = w->load_balance; + ctx->load_balance = w->load_balance; } else if (w->type == BOND) { - if (!ctx->wants_balance) { + if (!ctx->load_balance) { wsim_err("Engine bonds need load balancing engine map!\n"); return 1; } @@ -1675,133 +1462,53 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) w->bond_master; } } - - wrk->ctx_list[j].targets_instance = targets; - if (flags & I915) - wrk->ctx_list[j].wants_balance |= balance; - } - - /* - * Ensure VCS is not allowed with engine map contexts. - */ - for (j = 0; j < wrk->nr_ctxs; j += 2) { - for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { - if (w->context != (j / 2)) - continue; - - if (w->type != BATCH) - continue; - - if (wrk->ctx_list[j].engine_map && - !wrk->ctx_list[j].wants_balance && - (w->engine == VCS || w->engine == DEFAULT)) { - wsim_err("Batches targetting engine maps must use explicit engines!\n"); - return -1; - } - } } - /* * Create and configure contexts. */ - for (i = 0; i < wrk->nr_ctxs; i += 2) { + for (i = 0; i < wrk->nr_ctxs; i++) { + struct drm_i915_gem_context_create_ext_setparam ext = { + .base.name = I915_CONTEXT_CREATE_EXT_SETPARAM, + .param.param = I915_CONTEXT_PARAM_VM, + }; + struct drm_i915_gem_context_create_ext args = { }; struct ctx *ctx = &wrk->ctx_list[i]; - uint32_t ctx_id, share_vm = 0; + uint32_t ctx_id; - if (ctx->id) - continue; + igt_assert(!ctx->id); - if ((flags & I915) || ctx->engine_map) { - struct drm_i915_gem_context_create_ext_setparam ext = { - .base.name = I915_CONTEXT_CREATE_EXT_SETPARAM, - .param.param = I915_CONTEXT_PARAM_VM, + /* Find existing context to share ppgtt with. */ + for (j = 0; !share_vm && j < wrk->nr_ctxs; j++) { + struct drm_i915_gem_context_param param = { + .param = I915_CONTEXT_PARAM_VM, + .ctx_id = wrk->ctx_list[j].id, }; - struct drm_i915_gem_context_create_ext args = { }; - - /* Find existing context to share ppgtt with. */ - for (j = 0; j < wrk->nr_ctxs; j++) { - struct drm_i915_gem_context_param param = { - .param = I915_CONTEXT_PARAM_VM, - }; - - if (!wrk->ctx_list[j].id) - continue; - - param.ctx_id = wrk->ctx_list[j].id; - gem_context_get_param(fd, ¶m); - igt_assert(param.value); - - share_vm = param.value; - - ext.param.value = share_vm; - args.flags = - I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS; - args.extensions = to_user_pointer(&ext); - break; - } - - if ((!ctx->engine_map && !ctx->targets_instance) || - (ctx->engine_map && ctx->wants_balance)) - args.flags |= - I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE; - - drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, - &args); + if (!param.ctx_id) + continue; - ctx_id = args.ctx_id; - } else { - struct drm_i915_gem_context_create args = {}; + gem_context_get_param(fd, ¶m); + igt_assert(param.value); + share_vm = param.value; + break; + } - drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &args); - ctx_id = args.ctx_id; + if (share_vm) { + ext.param.value = share_vm; + args.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS; + args.extensions = to_user_pointer(&ext); } - igt_assert(ctx_id); + drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &args); + igt_assert(args.ctx_id); + + ctx_id = args.ctx_id; ctx->id = ctx_id; ctx->sseu = device_sseu.slice_mask; - if (flags & GLOBAL_BALANCE) { - ctx->static_vcs = context_vcs_rr; - context_vcs_rr ^= 1; - } else { - ctx->static_vcs = ctx_vcs; - ctx_vcs ^= 1; - } - __configure_context(ctx_id, wrk->prio); - /* - * Do we need a separate context to satisfy this workloads which - * both want to target specific engines and be balanced by i915? - */ - if ((flags & I915) && ctx->wants_balance && - ctx->targets_instance && !ctx->engine_map) { - struct drm_i915_gem_context_create_ext_setparam ext = { - .base.name = I915_CONTEXT_CREATE_EXT_SETPARAM, - .param.param = I915_CONTEXT_PARAM_VM, - .param.value = share_vm, - }; - struct drm_i915_gem_context_create_ext args = { - .extensions = to_user_pointer(&ext), - .flags = - I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS | - I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE, - }; - - igt_assert(share_vm); - - drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, - &args); - - igt_assert(args.ctx_id); - ctx_id = args.ctx_id; - wrk->ctx_list[i + 1].id = args.ctx_id; - - __configure_context(ctx_id, wrk->prio); - } - if (ctx->engine_map) { struct i915_context_param_engines *set_engines = alloca0(sizeof_param_engines(ctx->engine_map_count + 1)); @@ -1815,7 +1522,7 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) }; struct i915_context_engines_bond *last = NULL; - if (ctx->wants_balance) { + if (ctx->load_balance) { set_engines->extensions = to_user_pointer(load_balance); @@ -1870,45 +1577,17 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) load_balance->base.next_extension = to_user_pointer(last); gem_context_set_param(fd, ¶m); - } else if (ctx->wants_balance) { - const unsigned int count = num_engines_in_class(VCS); - struct i915_context_engines_load_balance *load_balance = - alloca0(sizeof_load_balance(count)); - struct i915_context_param_engines *set_engines = - alloca0(sizeof_param_engines(count + 1)); - struct drm_i915_gem_context_param param = { - .ctx_id = ctx_id, - .param = I915_CONTEXT_PARAM_ENGINES, - .size = sizeof_param_engines(count + 1), - .value = to_user_pointer(set_engines), - }; - - set_engines->extensions = to_user_pointer(load_balance); - - set_engines->engines[0].engine_class = - I915_ENGINE_CLASS_INVALID; - set_engines->engines[0].engine_instance = - I915_ENGINE_CLASS_INVALID_NONE; - fill_engines_class(&set_engines->engines[1], VCS); - - load_balance->base.name = - I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; - load_balance->num_siblings = count; - - fill_engines_class(&load_balance->engines[0], VCS); - - gem_context_set_param(fd, ¶m); } if (wrk->sseu) { /* Set to slice 0 only, one slice. */ ctx->sseu = set_ctx_sseu(ctx, 1); } - - if (share_vm) - vm_destroy(fd, share_vm); } + if (share_vm) + vm_destroy(fd, share_vm); + /* Record default preemption. */ for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { if (w->type == BATCH) @@ -1954,16 +1633,10 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) * Allocate batch buffers. */ for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { - unsigned int _flags = flags; - enum intel_engine_id engine = w->engine; - if (w->type != BATCH) continue; - if (engine == VCS) - _flags &= ~SWAPVCS; - - alloc_step_batch(wrk, w, _flags); + alloc_step_batch(wrk, w); } return 0; @@ -1980,602 +1653,6 @@ static int elapsed_us(const struct timespec *start, const struct timespec *end) return elapsed(start, end) * 1e6; } -static enum intel_engine_id get_vcs_engine(unsigned int n) -{ - const enum intel_engine_id vcs_engines[2] = { VCS1, VCS2 }; - - igt_assert(n < ARRAY_SIZE(vcs_engines)); - - return vcs_engines[n]; -} - -static uint32_t new_seqno(struct workload *wrk, enum intel_engine_id engine) -{ - uint32_t seqno; - int ret; - - if (wrk->flags & GLOBAL_BALANCE) { - igt_assert(wrk->global_wrk); - wrk = wrk->global_wrk; - - ret = pthread_mutex_lock(&wrk->mutex); - igt_assert(ret == 0); - } - - seqno = ++wrk->seqno[engine]; - - if (wrk->flags & GLOBAL_BALANCE) { - ret = pthread_mutex_unlock(&wrk->mutex); - igt_assert(ret == 0); - } - - return seqno; -} - -static uint32_t -current_seqno(struct workload *wrk, enum intel_engine_id engine) -{ - if (wrk->flags & GLOBAL_BALANCE) - return wrk->global_wrk->seqno[engine]; - else - return wrk->seqno[engine]; -} - -static uint32_t -read_status_page(struct workload *wrk, unsigned int idx) -{ - if (wrk->flags & GLOBAL_BALANCE) - return READ_ONCE(wrk->global_wrk->status_page[idx]); - else - return READ_ONCE(wrk->status_page[idx]); -} - -static uint32_t -current_gpu_seqno(struct workload *wrk, enum intel_engine_id engine) -{ - return read_status_page(wrk, SEQNO_IDX(engine)); -} - -struct workload_balancer { - unsigned int id; - const char *name; - const char *desc; - unsigned int flags; - unsigned int min_gen; - - int (*init)(const struct workload_balancer *balancer, - struct workload *wrk); - unsigned int (*get_qd)(const struct workload_balancer *balancer, - struct workload *wrk, - enum intel_engine_id engine); - enum intel_engine_id (*balance)(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w); -}; - -static enum intel_engine_id -rr_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - unsigned int engine; - - engine = get_vcs_engine(wrk->vcs_rr); - wrk->vcs_rr ^= 1; - - return engine; -} - -static enum intel_engine_id -rand_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - return get_vcs_engine(hars_petruska_f54_1_random(&wrk->prng) & 1); -} - -static unsigned int -get_qd_depth(const struct workload_balancer *balancer, - struct workload *wrk, enum intel_engine_id engine) -{ - return current_seqno(wrk, engine) - current_gpu_seqno(wrk, engine); -} - -static enum intel_engine_id -__qd_select_engine(struct workload *wrk, const unsigned long *qd, bool random) -{ - unsigned int n; - - if (qd[VCS1] < qd[VCS2]) - n = 0; - else if (qd[VCS1] > qd[VCS2]) - n = 1; - else if (random) - n = hars_petruska_f54_1_random(&wrk->prng) & 1; - else - n = wrk->vcs_rr; - wrk->vcs_rr = n ^ 1; - - return get_vcs_engine(n); -} - -static enum intel_engine_id -__qd_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w, bool random) -{ - enum intel_engine_id engine; - unsigned long qd[NUM_ENGINES]; - - igt_assert(w->engine == VCS); - - qd[VCS1] = balancer->get_qd(balancer, wrk, VCS1); - wrk->qd_sum[VCS1] += qd[VCS1]; - - qd[VCS2] = balancer->get_qd(balancer, wrk, VCS2); - wrk->qd_sum[VCS2] += qd[VCS2]; - - engine = __qd_select_engine(wrk, qd, random); - -#ifdef DEBUG - printf("qd_balance[%u]: 1:%ld 2:%ld rr:%u = %u\t(%u - %u) (%u - %u)\n", - wrk->id, qd[VCS1], qd[VCS2], wrk->vcs_rr, engine, - current_seqno(wrk, VCS1), current_gpu_seqno(wrk, VCS1), - current_seqno(wrk, VCS2), current_gpu_seqno(wrk, VCS2)); -#endif - return engine; -} - -static enum intel_engine_id -qd_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - return __qd_balance(balancer, wrk, w, false); -} - -static enum intel_engine_id -qdr_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - return __qd_balance(balancer, wrk, w, true); -} - -static enum intel_engine_id -qdavg_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - unsigned long qd[NUM_ENGINES]; - unsigned int engine; - - igt_assert(w->engine == VCS); - - for (engine = VCS1; engine <= VCS2; engine++) { - qd[engine] = balancer->get_qd(balancer, wrk, engine); - wrk->qd_sum[engine] += qd[engine]; - - ewma_rt_add(&wrk->rt.avg[engine], qd[engine]); - qd[engine] = ewma_rt_read(&wrk->rt.avg[engine]); - } - - engine = __qd_select_engine(wrk, qd, false); -#ifdef DEBUG - printf("qdavg_balance[%u]: 1:%ld 2:%ld rr:%u = %u\t(%u - %u) (%u - %u)\n", - wrk->id, qd[VCS1], qd[VCS2], wrk->vcs_rr, engine, - current_seqno(wrk, VCS1), current_gpu_seqno(wrk, VCS1), - current_seqno(wrk, VCS2), current_gpu_seqno(wrk, VCS2)); -#endif - return engine; -} - -static enum intel_engine_id -__rt_select_engine(struct workload *wrk, unsigned long *qd, bool random) -{ - qd[VCS1] >>= 10; - qd[VCS2] >>= 10; - - return __qd_select_engine(wrk, qd, random); -} - -struct rt_depth { - uint32_t seqno; - uint32_t submitted; - uint32_t completed; -}; - -static void get_rt_depth(struct workload *wrk, - unsigned int engine, - struct rt_depth *rt) -{ - const unsigned int idx = SEQNO_IDX(engine); - uint32_t latch; - - do { - latch = read_status_page(wrk, idx + 3); - rt->submitted = read_status_page(wrk, idx + 1); - rt->completed = read_status_page(wrk, idx + 2); - rt->seqno = read_status_page(wrk, idx); - } while (latch != rt->seqno); -} - -static enum intel_engine_id -__rt_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w, bool random) -{ - unsigned long qd[NUM_ENGINES]; - unsigned int engine; - - igt_assert(w->engine == VCS); - - /* Estimate the "speed" of the most recent batch - * (finish time - submit time) - * and use that as an approximate for the total remaining time for - * all batches on that engine, plus the time we expect this batch to - * take. We try to keep the total balanced between the engines. - */ - for (engine = VCS1; engine <= VCS2; engine++) { - struct rt_depth rt; - - get_rt_depth(wrk, engine, &rt); - qd[engine] = current_seqno(wrk, engine) - rt.seqno; - wrk->qd_sum[engine] += qd[engine]; - qd[engine] = (qd[engine] + 1) * (rt.completed - rt.submitted); -#ifdef DEBUG - printf("rt[0] = %d (%d - %d) x %d (%d - %d) = %ld\n", - current_seqno(wrk, engine) - rt.seqno, - current_seqno(wrk, engine), rt.seqno, - rt.completed - rt.submitted, - rt.completed, rt.submitted, - qd[engine]); -#endif - } - - return __rt_select_engine(wrk, qd, random); -} - -static enum intel_engine_id -rt_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - - return __rt_balance(balancer, wrk, w, false); -} - -static enum intel_engine_id -rtr_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - return __rt_balance(balancer, wrk, w, true); -} - -static enum intel_engine_id -rtavg_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - unsigned long qd[NUM_ENGINES]; - unsigned int engine; - - igt_assert(w->engine == VCS); - - /* Estimate the average "speed" of the most recent batches - * (finish time - submit time) - * and use that as an approximate for the total remaining time for - * all batches on that engine plus the time we expect to execute in. - * We try to keep the total remaining balanced between the engines. - */ - for (engine = VCS1; engine <= VCS2; engine++) { - struct rt_depth rt; - - get_rt_depth(wrk, engine, &rt); - if (rt.seqno != wrk->rt.last[engine]) { - igt_assert((long)(rt.completed - rt.submitted) > 0); - ewma_rt_add(&wrk->rt.avg[engine], - rt.completed - rt.submitted); - wrk->rt.last[engine] = rt.seqno; - } - qd[engine] = current_seqno(wrk, engine) - rt.seqno; - wrk->qd_sum[engine] += qd[engine]; - qd[engine] = - (qd[engine] + 1) * ewma_rt_read(&wrk->rt.avg[engine]); - -#ifdef DEBUG - printf("rtavg[%d] = %d (%d - %d) x %ld (%d) = %ld\n", - engine, - current_seqno(wrk, engine) - rt.seqno, - current_seqno(wrk, engine), rt.seqno, - ewma_rt_read(&wrk->rt.avg[engine]), - rt.completed - rt.submitted, - qd[engine]); -#endif - } - - return __rt_select_engine(wrk, qd, false); -} - -static enum intel_engine_id -context_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - return get_vcs_engine(__get_ctx(wrk, w)->static_vcs); -} - -static unsigned int -get_engine_busy(const struct workload_balancer *balancer, - struct workload *wrk, enum intel_engine_id engine) -{ - struct busy_balancer *bb = &wrk->busy_balancer; - - if (engine == VCS2 && (wrk->flags & VCS2REMAP)) - engine = BCS; - - return bb->busy[bb->engine_map[engine]]; -} - -static void -get_pmu_stats(const struct workload_balancer *b, struct workload *wrk) -{ - struct busy_balancer *bb = &wrk->busy_balancer; - uint64_t val[7]; - unsigned int i; - - igt_assert_eq(read(bb->fd, val, sizeof(val)), - (2 + bb->num_engines) * sizeof(uint64_t)); - - if (!bb->first) { - for (i = 0; i < bb->num_engines; i++) { - double d; - - d = (val[2 + i] - bb->prev[i]) * 100; - d /= val[1] - bb->t_prev; - bb->busy[i] = d; - } - } - - for (i = 0; i < bb->num_engines; i++) - bb->prev[i] = val[2 + i]; - - bb->t_prev = val[1]; - bb->first = false; -} - -static enum intel_engine_id -busy_avg_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - get_pmu_stats(balancer, wrk); - - return qdavg_balance(balancer, wrk, w); -} - -static enum intel_engine_id -busy_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - get_pmu_stats(balancer, wrk); - - return qd_balance(balancer, wrk, w); -} - -static int -busy_init(const struct workload_balancer *balancer, struct workload *wrk) -{ - struct busy_balancer *bb = &wrk->busy_balancer; - struct engine_desc { - unsigned class, inst; - enum intel_engine_id id; - } *d, engines[] = { - { I915_ENGINE_CLASS_RENDER, 0, RCS }, - { I915_ENGINE_CLASS_COPY, 0, BCS }, - { I915_ENGINE_CLASS_VIDEO, 0, VCS1 }, - { I915_ENGINE_CLASS_VIDEO, 1, VCS2 }, - { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS }, - { 0, 0, VCS } - }; - - bb->num_engines = 0; - bb->first = true; - bb->fd = -1; - - for (d = &engines[0]; d->id != VCS; d++) { - int pfd; - - pfd = perf_igfx_open_group(I915_PMU_ENGINE_BUSY(d->class, - d->inst), - bb->fd); - if (pfd < 0) { - if (d->id != VCS2) - return -(10 + bb->num_engines); - else - continue; - } - - if (bb->num_engines == 0) - bb->fd = pfd; - - bb->engine_map[d->id] = bb->num_engines++; - } - - if (bb->num_engines < 5 && !(wrk->flags & VCS2REMAP)) - return -1; - - return 0; -} - -static const struct workload_balancer all_balancers[] = { - { - .id = 0, - .name = "rr", - .desc = "Simple round-robin.", - .balance = rr_balance, - }, - { - .id = 6, - .name = "rand", - .desc = "Random selection.", - .balance = rand_balance, - }, - { - .id = 1, - .name = "qd", - .desc = "Queue depth estimation with round-robin on equal depth.", - .flags = SEQNO, - .min_gen = 8, - .get_qd = get_qd_depth, - .balance = qd_balance, - }, - { - .id = 5, - .name = "qdr", - .desc = "Queue depth estimation with random selection on equal depth.", - .flags = SEQNO, - .min_gen = 8, - .get_qd = get_qd_depth, - .balance = qdr_balance, - }, - { - .id = 7, - .name = "qdavg", - .desc = "Like qd, but using an average queue depth estimator.", - .flags = SEQNO, - .min_gen = 8, - .get_qd = get_qd_depth, - .balance = qdavg_balance, - }, - { - .id = 2, - .name = "rt", - .desc = "Queue depth plus last runtime estimation.", - .flags = SEQNO | RT, - .min_gen = 8, - .get_qd = get_qd_depth, - .balance = rt_balance, - }, - { - .id = 3, - .name = "rtr", - .desc = "Like rt but with random engine selection on equal depth.", - .flags = SEQNO | RT, - .min_gen = 8, - .get_qd = get_qd_depth, - .balance = rtr_balance, - }, - { - .id = 4, - .name = "rtavg", - .desc = "Improved version rt tracking average execution speed per engine.", - .flags = SEQNO | RT, - .min_gen = 8, - .get_qd = get_qd_depth, - .balance = rtavg_balance, - }, - { - .id = 8, - .name = "context", - .desc = "Static round-robin VCS assignment at context creation.", - .balance = context_balance, - }, - { - .id = 9, - .name = "busy", - .desc = "Engine busyness based balancing.", - .init = busy_init, - .get_qd = get_engine_busy, - .balance = busy_balance, - }, - { - .id = 10, - .name = "busy-avg", - .desc = "Average engine busyness based balancing.", - .init = busy_init, - .get_qd = get_engine_busy, - .balance = busy_avg_balance, - }, - { - .id = 11, - .name = "i915", - .desc = "i915 balancing.", - .flags = I915, - }, -}; - -static unsigned int -global_get_qd(const struct workload_balancer *balancer, - struct workload *wrk, enum intel_engine_id engine) -{ - igt_assert(wrk->global_wrk); - igt_assert(wrk->global_balancer); - - return wrk->global_balancer->get_qd(wrk->global_balancer, - wrk->global_wrk, engine); -} - -static enum intel_engine_id -global_balance(const struct workload_balancer *balancer, - struct workload *wrk, struct w_step *w) -{ - enum intel_engine_id engine; - int ret; - - igt_assert(wrk->global_wrk); - igt_assert(wrk->global_balancer); - - wrk = wrk->global_wrk; - - ret = pthread_mutex_lock(&wrk->mutex); - igt_assert(ret == 0); - - engine = wrk->global_balancer->balance(wrk->global_balancer, wrk, w); - - ret = pthread_mutex_unlock(&wrk->mutex); - igt_assert(ret == 0); - - return engine; -} - -static const struct workload_balancer global_balancer = { - .id = ~0, - .name = "global", - .desc = "Global balancer", - .get_qd = global_get_qd, - .balance = global_balance, - }; - -static void -update_bb_seqno(struct w_step *w, enum intel_engine_id engine, uint32_t seqno) -{ - gem_set_domain(fd, w->bb_handle, - I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); - - w->reloc[0].delta = SEQNO_OFFSET(engine); - - *w->seqno_value = seqno; - *w->seqno_address = w->reloc[0].presumed_offset + w->reloc[0].delta; - - /* If not using NO_RELOC, force the relocations */ - if (!(w->eb.flags & I915_EXEC_NO_RELOC)) - w->reloc[0].presumed_offset = -1; -} - -static void -update_bb_rt(struct w_step *w, enum intel_engine_id engine, uint32_t seqno) -{ - gem_set_domain(fd, w->bb_handle, - I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); - - w->reloc[1].delta = SEQNO_OFFSET(engine) + sizeof(uint32_t); - w->reloc[2].delta = SEQNO_OFFSET(engine) + 2 * sizeof(uint32_t); - w->reloc[3].delta = SEQNO_OFFSET(engine) + 3 * sizeof(uint32_t); - - *w->latch_value = seqno; - *w->latch_address = w->reloc[3].presumed_offset + w->reloc[3].delta; - - *w->rt0_value = *REG(RCS_TIMESTAMP); - *w->rt0_address = w->reloc[1].presumed_offset + w->reloc[1].delta; - *w->rt1_address = w->reloc[2].presumed_offset + w->reloc[2].delta; - - /* If not using NO_RELOC, force the relocations */ - if (!(w->eb.flags & I915_EXEC_NO_RELOC)) { - w->reloc[1].presumed_offset = -1; - w->reloc[2].presumed_offset = -1; - w->reloc[3].presumed_offset = -1; - } -} - static void update_bb_start(struct w_step *w) { @@ -2606,123 +1683,12 @@ static void w_sync_to(struct workload *wrk, struct w_step *w, int target) gem_sync(fd, wrk->steps[target].obj[0].handle); } -static uint32_t *get_status_cs(struct workload *wrk) -{ - return wrk->status_cs; -} - -#define INIT_CLOCKS 0x1 -#define INIT_ALL (INIT_CLOCKS) -static void init_status_page(struct workload *wrk, unsigned int flags) -{ - struct drm_i915_gem_relocation_entry reloc[4] = {}; - struct drm_i915_gem_exec_object2 *status_object = - get_status_objects(wrk); - struct drm_i915_gem_execbuffer2 eb = { - .buffer_count = ARRAY_SIZE(wrk->status_object), - .buffers_ptr = to_user_pointer(status_object) - }; - uint32_t *base = get_status_cs(wrk); - - /* Want to make sure that the balancer has a reasonable view of - * the background busyness of each engine. To do that we occasionally - * send a dummy batch down the pipeline. - */ - - if (!base) - return; - - gem_set_domain(fd, status_object[1].handle, - I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); - - status_object[1].relocs_ptr = to_user_pointer(reloc); - status_object[1].relocation_count = 2; - if (flags & INIT_CLOCKS) - status_object[1].relocation_count += 2; - - for (int engine = 0; engine < NUM_ENGINES; engine++) { - struct drm_i915_gem_relocation_entry *r = reloc; - uint64_t presumed_offset = status_object[0].offset; - uint32_t offset = engine * 128; - uint32_t *cs = base + offset / sizeof(*cs); - uint64_t addr; - - r->offset = offset + sizeof(uint32_t); - r->delta = SEQNO_OFFSET(engine); - r->presumed_offset = presumed_offset; - addr = presumed_offset + r->delta; - r++; - *cs++ = MI_STORE_DWORD_IMM; - *cs++ = addr; - *cs++ = addr >> 32; - *cs++ = new_seqno(wrk, engine); - offset += 4 * sizeof(uint32_t); - - /* When we are busy, we can just reuse the last set of timings. - * If we have been idle for a while, we want to resample the - * latency on each engine (to measure external load). - */ - if (flags & INIT_CLOCKS) { - r->offset = offset + sizeof(uint32_t); - r->delta = SEQNO_OFFSET(engine) + sizeof(uint32_t); - r->presumed_offset = presumed_offset; - addr = presumed_offset + r->delta; - r++; - *cs++ = MI_STORE_DWORD_IMM; - *cs++ = addr; - *cs++ = addr >> 32; - *cs++ = *REG(RCS_TIMESTAMP); - offset += 4 * sizeof(uint32_t); - - r->offset = offset + 2 * sizeof(uint32_t); - r->delta = SEQNO_OFFSET(engine) + 2*sizeof(uint32_t); - r->presumed_offset = presumed_offset; - addr = presumed_offset + r->delta; - r++; - *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */ - *cs++ = RCS_TIMESTAMP; - *cs++ = addr; - *cs++ = addr >> 32; - offset += 4 * sizeof(uint32_t); - } - - r->offset = offset + sizeof(uint32_t); - r->delta = SEQNO_OFFSET(engine) + 3*sizeof(uint32_t); - r->presumed_offset = presumed_offset; - addr = presumed_offset + r->delta; - r++; - *cs++ = MI_STORE_DWORD_IMM; - *cs++ = addr; - *cs++ = addr >> 32; - *cs++ = current_seqno(wrk, engine); - offset += 4 * sizeof(uint32_t); - - *cs++ = MI_BATCH_BUFFER_END; - - eb_set_engine(&eb, engine, wrk->flags); - eb.flags |= I915_EXEC_HANDLE_LUT; - eb.flags |= I915_EXEC_NO_RELOC; - - eb.batch_start_offset = 128 * engine; - - gem_execbuf(fd, &eb); - } -} - static void -do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine, - unsigned int flags) +do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine) { - uint32_t seqno = new_seqno(wrk, engine); unsigned int i; - eb_update_flags(wrk, w, engine, flags); - - if (flags & SEQNO) - update_bb_seqno(w, engine, seqno); - if (flags & RT) - update_bb_rt(w, engine, seqno); - + eb_update_flags(wrk, w, engine); update_bb_start(w); w->eb.batch_start_offset = @@ -2758,9 +1724,8 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine, } } -static bool sync_deps(struct workload *wrk, struct w_step *w) +static void sync_deps(struct workload *wrk, struct w_step *w) { - bool synced = false; unsigned int i; for (i = 0; i < w->data_deps.nr; i++) { @@ -2777,11 +1742,7 @@ static bool sync_deps(struct workload *wrk, struct w_step *w) igt_assert(wrk->steps[dep_idx].type == BATCH); gem_sync(fd, wrk->steps[dep_idx].obj[0].handle); - - synced = true; } - - return synced; } static void *run_workload(void *data) @@ -2789,7 +1750,6 @@ static void *run_workload(void *data) struct workload *wrk = (struct workload *)data; struct timespec t_start, t_end; struct w_step *w; - bool last_sync = false; int throttle = -1; int qd_throttle = -1; int count; @@ -2797,7 +1757,6 @@ static void *run_workload(void *data) clock_gettime(CLOCK_MONOTONIC, &t_start); - init_status_page(wrk, INIT_ALL); for (count = 0; wrk->run && (wrk->background || count < wrk->repeat); count++) { unsigned int cur_seqno = wrk->sync_seqno; @@ -2898,26 +1857,13 @@ static void *run_workload(void *data) igt_assert(w->type == BATCH); - if ((wrk->flags & DEPSYNC) && engine == VCS) - last_sync = sync_deps(wrk, w); - - if (last_sync && (wrk->flags & HEARTBEAT)) - init_status_page(wrk, 0); - - last_sync = false; - - wrk->nr_bb[engine]++; - if (engine == VCS && wrk->balancer && - wrk->balancer->balance) { - engine = wrk->balancer->balance(wrk->balancer, - wrk, w); - wrk->nr_bb[engine]++; - } + if (wrk->flags & DEPSYNC) + sync_deps(wrk, w); if (throttle > 0) w_sync_to(wrk, w, i - throttle); - do_eb(wrk, w, engine, wrk->flags); + do_eb(wrk, w, engine); if (w->request != -1) { igt_list_del(&w->rq_link); @@ -2930,10 +1876,8 @@ static void *run_workload(void *data) if (!wrk->run) break; - if (w->sync) { + if (w->sync) gem_sync(fd, w->obj[0].handle); - last_sync = true; - } if (qd_throttle > 0) { while (wrk->nrequest[engine] > qd_throttle) { @@ -2943,7 +1887,6 @@ static void *run_workload(void *data) s, rq_link); gem_sync(fd, s->obj[0].handle); - last_sync = true; s->request = -1; igt_list_del(&s->rq_link); @@ -2986,13 +1929,6 @@ static void *run_workload(void *data) printf("%c%u: %.3fs elapsed (%d cycles, %.3f workloads/s).", wrk->background ? ' ' : '*', wrk->id, t, count, count / t); - if (wrk->balancer) - printf(" %lu (%lu + %lu) total VCS batches.", - wrk->nr_bb[VCS], wrk->nr_bb[VCS1], wrk->nr_bb[VCS2]); - if (wrk->balancer && wrk->balancer->get_qd) - printf(" Average queue depths %.3f, %.3f.", - (double)wrk->qd_sum[VCS1] / wrk->nr_bb[VCS], - (double)wrk->qd_sum[VCS2] / wrk->nr_bb[VCS]); putchar('\n'); } @@ -3114,8 +2050,6 @@ calibrate_engines(void) static void print_help(void) { - unsigned int i; - puts( "Usage: gem_wsim [OPTIONS]\n" "\n" @@ -3145,32 +2079,11 @@ static void print_help(void) " -a <desc|path> Append a workload to all other workloads.\n" " -r <n> How many times to emit the workload.\n" " -c <n> Fork N clients emitting the workload simultaneously.\n" -" -x Swap VCS1 and VCS2 engines in every other client.\n" -" -b <n> Load balancing to use.\n" -" Available load balancers are:" - ); - - for (i = 0; i < ARRAY_SIZE(all_balancers); i++) { - igt_assert(all_balancers[i].desc); - printf( -" %s (%u): %s\n", - all_balancers[i].name, all_balancers[i].id, - all_balancers[i].desc); - } - puts( -" Balancers can be specified either as names or as their id\n" -" number as listed above.\n" -" -2 Remap VCS2 to BCS.\n" -" -R Round-robin initial VCS assignment per client.\n" -" -H Send heartbeat on synchronisation points with seqno based\n" -" balancers. Gives better engine busyness view in some cases.\n" -" -s Turn on small SSEU config for the next workload on the\n" -" command line. Subsequent -s switches it off.\n" -" -S Synchronize the sequence of random batch durations between\n" -" clients.\n" -" -G Global load balancing - a single load balancer will be shared\n" -" between all clients and there will be a single seqno domain.\n" -" -d Sync between data dependencies in userspace." +" -s Turn on small SSEU config for the next workload on the\n" +" command line. Subsequent -s switches it off.\n" +" -S Synchronize the sequence of random batch durations between\n" +" clients.\n" +" -d Sync between data dependencies in userspace." ); } @@ -3218,62 +2131,6 @@ add_workload_arg(struct w_arg *w_args, unsigned int nr_args, char *w_arg, return w_args; } -static int find_balancer_by_name(char *name) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(all_balancers); i++) { - if (!strcasecmp(name, all_balancers[i].name)) - return all_balancers[i].id; - } - - return -1; -} - -static const struct workload_balancer *find_balancer_by_id(unsigned int id) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(all_balancers); i++) { - if (id == all_balancers[i].id) - return &all_balancers[i]; - } - - return NULL; -} - -static void init_clocks(void) -{ - struct timespec t_start, t_end; - uint32_t rcs_start, rcs_end; - double overhead, t; - - if (verbose <= 1) - return; - - clock_gettime(CLOCK_MONOTONIC, &t_start); - for (int i = 0; i < 100; i++) - rcs_start = *REG(RCS_TIMESTAMP); - clock_gettime(CLOCK_MONOTONIC, &t_end); - overhead = 2 * elapsed(&t_start, &t_end) / 100; - - clock_gettime(CLOCK_MONOTONIC, &t_start); - for (int i = 0; i < 100; i++) - clock_gettime(CLOCK_MONOTONIC, &t_end); - clock_gettime(CLOCK_MONOTONIC, &t_end); - overhead += elapsed(&t_start, &t_end) / 100; - - clock_gettime(CLOCK_MONOTONIC, &t_start); - rcs_start = *REG(RCS_TIMESTAMP); - usleep(100); - rcs_end = *REG(RCS_TIMESTAMP); - clock_gettime(CLOCK_MONOTONIC, &t_end); - - t = elapsed(&t_start, &t_end) - overhead; - printf("%d cycles in %.1fus, i.e. 1024 cycles takes %1.fus\n", - rcs_end - rcs_start, 1e6*t, 1024e6 * t / (rcs_end - rcs_start)); -} - int main(int argc, char **argv) { unsigned int repeat = 1; @@ -3287,9 +2144,7 @@ int main(int argc, char **argv) char *append_workload_arg = NULL; struct w_arg *w_args = NULL; unsigned int tolerance_pct = 1; - const struct workload_balancer *balancer = NULL; int exitcode = EXIT_FAILURE; - char *endptr = NULL; int prio = 0; double t; int i, c; @@ -3304,17 +2159,13 @@ int main(int argc, char **argv) * This minimizes the gap in engine utilization tracking when observed * via external tools like trace.pl. */ - fd = __drm_open_driver(DRIVER_INTEL); + fd = __drm_open_driver_render(DRIVER_INTEL); igt_require(fd); - intel_register_access_init(&mmio_data, intel_get_pci_device(), false, fd); - - init_clocks(); - master_prng = time(NULL); while ((c = getopt(argc, argv, - "Thqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) { + "ThqvsSdc:n:r:w:W:a:t:p:I:")) != -1) { switch (c) { case 'W': if (master_workload >= 0) { @@ -3413,52 +2264,15 @@ int main(int argc, char **argv) case 'v': verbose++; break; - case 'x': - flags |= SWAPVCS; - break; - case '2': - flags |= VCS2REMAP; - break; - case 'R': - flags |= INITVCSRR; - break; case 'S': flags |= SYNCEDCLIENTS; break; case 's': flags ^= SSEU; break; - case 'H': - flags |= HEARTBEAT; - break; - case 'G': - flags |= GLOBAL_BALANCE; - break; case 'd': flags |= DEPSYNC; break; - case 'b': - i = find_balancer_by_name(optarg); - if (i < 0) { - i = strtol(optarg, &endptr, 0); - if (endptr && *endptr) - i = -1; - } - - if (i >= 0) { - balancer = find_balancer_by_id(i); - if (balancer) { - igt_assert(intel_gen(intel_get_drm_devid(fd)) >= balancer->min_gen); - flags |= BALANCE | balancer->flags; - } - } - - if (!balancer) { - wsim_err("Unknown balancing mode '%s'!\n", - optarg); - goto err; - } - break; case 'I': master_prng = strtol(optarg, NULL, 0); break; @@ -3470,16 +2284,6 @@ int main(int argc, char **argv) } } - if ((flags & HEARTBEAT) && !(flags & SEQNO)) { - wsim_err("Heartbeat needs a seqno based balancer!\n"); - goto err; - } - - if ((flags & VCS2REMAP) && (flags & I915)) { - wsim_err("VCS remapping not supported with i915 balancing!\n"); - goto err; - } - if (!has_nop_calibration) { if (verbose > 1) { printf("Calibrating nop delays with %u%% tolerance...\n", @@ -3519,11 +2323,6 @@ int main(int argc, char **argv) goto err; } - if ((flags & GLOBAL_BALANCE) && !balancer) { - wsim_err("Balancer not specified in global balancing mode!\n"); - goto err; - } - if (append_workload_arg) { append_workload_arg = load_workload_descriptor(append_workload_arg); if (!append_workload_arg) { @@ -3566,19 +2365,6 @@ int main(int argc, char **argv) printf("Random seed is %u.\n", master_prng); print_engine_calibrations(); printf("%u client%s.\n", clients, clients > 1 ? "s" : ""); - if (flags & SWAPVCS) - printf("Swapping VCS rings between clients.\n"); - if (flags & GLOBAL_BALANCE) { - if (flags & I915) { - printf("Ignoring global balancing with i915!\n"); - flags &= ~GLOBAL_BALANCE; - } else { - printf("Using %s balancer in global mode.\n", - balancer->name); - } - } else if (balancer) { - printf("Using %s balancer.\n", balancer->name); - } } srand(master_prng); @@ -3591,41 +2377,18 @@ int main(int argc, char **argv) igt_assert(w); for (i = 0; i < clients; i++) { - unsigned int flags_ = flags; - w[i] = clone_workload(wrk[nr_w_args > 1 ? i : 0]); - if (flags & SWAPVCS && i & 1) - flags_ &= ~SWAPVCS; - - if ((flags & GLOBAL_BALANCE) && !(flags & I915)) { - w[i]->balancer = &global_balancer; - w[i]->global_wrk = w[0]; - w[i]->global_balancer = balancer; - } else { - w[i]->balancer = balancer; - } - w[i]->flags = flags; w[i]->repeat = repeat; w[i]->background = master_workload >= 0 && i != master_workload; w[i]->print_stats = verbose > 1 || (verbose > 0 && master_workload == i); - if (prepare_workload(i, w[i], flags_)) { + if (prepare_workload(i, w[i])) { wsim_err("Failed to prepare workload %u!\n", i); goto err; } - - - if (balancer && balancer->init) { - int ret = balancer->init(balancer, w[i]); - if (ret) { - wsim_err("Failed to initialize balancing! (%u=%d)\n", - i, ret); - goto err; - } - } } clock_gettime(CLOCK_MONOTONIC, &t_start); @@ -3670,6 +2433,5 @@ int main(int argc, char **argv) out: exitcode = EXIT_SUCCESS; err: - intel_register_access_fini(&mmio_data); return exitcode; } diff --git a/benchmarks/ilog2.h b/benchmarks/ilog2.h deleted file mode 100644 index 596d7c23..00000000 --- a/benchmarks/ilog2.h +++ /dev/null @@ -1,104 +0,0 @@ -#ifndef ILOG2_H -#define ILOG2_H - -#include <stdint.h> - -static inline int fls(int x) -{ - int r = -1; - asm("bsrl %1,%0" : "=r" (r) : "rm" (x), "0" (-1)); - return r + 1; -} - -static inline int fls64(__u64 x) -{ - int r = -1; - asm("bsrq %1,%q0" : "+r" (r) : "rm" (x)); - return r + 1; -} - -static inline __attribute__((const)) -int __ilog2_u32(uint32_t n) -{ - return fls(n) - 1; -} - -static inline __attribute__((const)) -int __ilog2_u64(uint64_t n) -{ - return fls64(n) - 1; -} - -#define ilog2(n) \ -( \ - __builtin_constant_p(n) ? ( \ - (n) < 2 ? 0 : \ - (n) & (1ULL << 63) ? 63 : \ - (n) & (1ULL << 62) ? 62 : \ - (n) & (1ULL << 61) ? 61 : \ - (n) & (1ULL << 60) ? 60 : \ - (n) & (1ULL << 59) ? 59 : \ - (n) & (1ULL << 58) ? 58 : \ - (n) & (1ULL << 57) ? 57 : \ - (n) & (1ULL << 56) ? 56 : \ - (n) & (1ULL << 55) ? 55 : \ - (n) & (1ULL << 54) ? 54 : \ - (n) & (1ULL << 53) ? 53 : \ - (n) & (1ULL << 52) ? 52 : \ - (n) & (1ULL << 51) ? 51 : \ - (n) & (1ULL << 50) ? 50 : \ - (n) & (1ULL << 49) ? 49 : \ - (n) & (1ULL << 48) ? 48 : \ - (n) & (1ULL << 47) ? 47 : \ - (n) & (1ULL << 46) ? 46 : \ - (n) & (1ULL << 45) ? 45 : \ - (n) & (1ULL << 44) ? 44 : \ - (n) & (1ULL << 43) ? 43 : \ - (n) & (1ULL << 42) ? 42 : \ - (n) & (1ULL << 41) ? 41 : \ - (n) & (1ULL << 40) ? 40 : \ - (n) & (1ULL << 39) ? 39 : \ - (n) & (1ULL << 38) ? 38 : \ - (n) & (1ULL << 37) ? 37 : \ - (n) & (1ULL << 36) ? 36 : \ - (n) & (1ULL << 35) ? 35 : \ - (n) & (1ULL << 34) ? 34 : \ - (n) & (1ULL << 33) ? 33 : \ - (n) & (1ULL << 32) ? 32 : \ - (n) & (1ULL << 31) ? 31 : \ - (n) & (1ULL << 30) ? 30 : \ - (n) & (1ULL << 29) ? 29 : \ - (n) & (1ULL << 28) ? 28 : \ - (n) & (1ULL << 27) ? 27 : \ - (n) & (1ULL << 26) ? 26 : \ - (n) & (1ULL << 25) ? 25 : \ - (n) & (1ULL << 24) ? 24 : \ - (n) & (1ULL << 23) ? 23 : \ - (n) & (1ULL << 22) ? 22 : \ - (n) & (1ULL << 21) ? 21 : \ - (n) & (1ULL << 20) ? 20 : \ - (n) & (1ULL << 19) ? 19 : \ - (n) & (1ULL << 18) ? 18 : \ - (n) & (1ULL << 17) ? 17 : \ - (n) & (1ULL << 16) ? 16 : \ - (n) & (1ULL << 15) ? 15 : \ - (n) & (1ULL << 14) ? 14 : \ - (n) & (1ULL << 13) ? 13 : \ - (n) & (1ULL << 12) ? 12 : \ - (n) & (1ULL << 11) ? 11 : \ - (n) & (1ULL << 10) ? 10 : \ - (n) & (1ULL << 9) ? 9 : \ - (n) & (1ULL << 8) ? 8 : \ - (n) & (1ULL << 7) ? 7 : \ - (n) & (1ULL << 6) ? 6 : \ - (n) & (1ULL << 5) ? 5 : \ - (n) & (1ULL << 4) ? 4 : \ - (n) & (1ULL << 3) ? 3 : \ - (n) & (1ULL << 2) ? 2 : \ - 1 ) : \ - (sizeof(n) <= 4) ? \ - __ilog2_u32(n) : \ - __ilog2_u64(n) \ - ) - -#endif /* ILOG2_H */ diff --git a/benchmarks/meson.build b/benchmarks/meson.build index ef93193b..c70e1aac 100644 --- a/benchmarks/meson.build +++ b/benchmarks/meson.build @@ -11,6 +11,7 @@ benchmark_progs = [ 'gem_prw', 'gem_set_domain', 'gem_syslatency', + 'gem_wsim', 'kms_vblank', 'prime_lookup', 'vgem_mmap', @@ -34,8 +35,3 @@ foreach prog : benchmark_progs install_dir : benchmarksdir, dependencies : igt_deps) endforeach - -executable('gem_wsim', 'gem_wsim.c', - install : true, - install_dir : benchmarksdir, - dependencies : igt_deps + [ lib_igt_perf ]) diff --git a/benchmarks/wsim/media-1080p-player.wsim b/benchmarks/wsim/media-1080p-player.wsim index bcbb0cfd..c87e1aee 100644 --- a/benchmarks/wsim/media-1080p-player.wsim +++ b/benchmarks/wsim/media-1080p-player.wsim @@ -1,3 +1,5 @@ +M.1.VCS +B.1 1.VCS.5000-10000.0.0 2.RCS.1000-2000.-1.0 P.3.1 diff --git a/benchmarks/wsim/media_1n2_480p.wsim b/benchmarks/wsim/media_1n2_480p.wsim index 11a4da6b..3ce15ebc 100644 --- a/benchmarks/wsim/media_1n2_480p.wsim +++ b/benchmarks/wsim/media_1n2_480p.wsim @@ -1,9 +1,15 @@ -1.VCS.12000-15000.0.0 +M.10.VCS +B.10 +M.11.VCS +B.11 +M.12.VCS +B.12 +10.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.1000-1400.-1.0 3.RCS.10000-12000.0.0 -3.VCS.2500-3500.-1.0 +11.VCS.2500-3500.-1.0 4.RCS.1000-2200.-5.0 5.RCS.1000-1400.-1.0 5.RCS.10000-12000.0.0 -5.VCS.2500-3500.-1.1 +12.VCS.2500-3500.-1.1 diff --git a/benchmarks/wsim/media_1n2_asy.wsim b/benchmarks/wsim/media_1n2_asy.wsim index 58c99ca1..f9943eb6 100644 --- a/benchmarks/wsim/media_1n2_asy.wsim +++ b/benchmarks/wsim/media_1n2_asy.wsim @@ -1,9 +1,11 @@ -1.VCS.12000-15000.0.0 +M.10.VCS +B.10 +10.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.1000-1400.-1.0 3.RCS.10000-12000.0.0 -3.VCS.2500-3500.-1.0 +11.VCS.2500-3500.-1.0 4.RCS.400-800.-5.0 5.RCS.500-700.-1.0 5.RCS.5000-6000.0.0 -5.VCS.1200-1500.-1.1 +12.VCS.1200-1500.-1.1 diff --git a/benchmarks/wsim/media_1n3_480p.wsim b/benchmarks/wsim/media_1n3_480p.wsim index c724ab28..4f585fa8 100644 --- a/benchmarks/wsim/media_1n3_480p.wsim +++ b/benchmarks/wsim/media_1n3_480p.wsim @@ -1,13 +1,21 @@ -1.VCS.12000-15000.0.0 +M.10.VCS +B.10 +M.11.VCS +B.11 +M.12.VCS +B.12 +M.13.VCS +B.13 +10.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.1000-1400.-1.0 3.RCS.10000-12000.0.0 -3.VCS.2500-3500.-1.0 +11.VCS.2500-3500.-1.0 4.RCS.1000-2200.-5.0 5.RCS.1000-1400.-1.0 5.RCS.10000-12000.0.0 -5.VCS.2500-3500.-1.0 +12.VCS.2500-3500.-1.0 6.RCS.1000-2200.-9.0 7.RCS.1000-1400.-1.0 7.RCS.10000-12000.0.0 -7.VCS.2500-3500.-1.1 +13.VCS.2500-3500.-1.1 diff --git a/benchmarks/wsim/media_1n3_asy.wsim b/benchmarks/wsim/media_1n3_asy.wsim index c7588328..dce7789e 100644 --- a/benchmarks/wsim/media_1n3_asy.wsim +++ b/benchmarks/wsim/media_1n3_asy.wsim @@ -1,3 +1,11 @@ +M.10.VCS +B.10 +M.11.VCS +B.11 +M.12.VCS +B.12 +M.13.VCS +B.13 1.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.1000-1400.-1.0 diff --git a/benchmarks/wsim/media_1n4_480p.wsim b/benchmarks/wsim/media_1n4_480p.wsim index e67fefc3..06fa9ade 100644 --- a/benchmarks/wsim/media_1n4_480p.wsim +++ b/benchmarks/wsim/media_1n4_480p.wsim @@ -1,17 +1,27 @@ -1.VCS.12000-15000.0.0 +M.10.VCS +B.10 +M.11.VCS +B.11 +M.12.VCS +B.12 +M.13.VCS +B.13 +M.14.VCS +B.14 +10.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.1000-1400.-1.0 3.RCS.10000-12000.0.0 -3.VCS.2500-3500.-1.0 +11.VCS.2500-3500.-1.0 4.RCS.1000-2200.-5.0 5.RCS.1000-1400.-1.0 5.RCS.10000-12000.0.0 -5.VCS.2500-3500.-1.0 +12.VCS.2500-3500.-1.0 6.RCS.1000-2200.-9.0 7.RCS.1000-1400.-1.0 7.RCS.10000-12000.0.0 -7.VCS.2500-3500.-1.0 +13.VCS.2500-3500.-1.0 8.RCS.1000-2200.-13.0 9.RCS.1000-1400.-1.0 9.RCS.10000-12000.0.0 -9.VCS.2500-3500.-1.1 +14.VCS.2500-3500.-1.1 diff --git a/benchmarks/wsim/media_1n4_asy.wsim b/benchmarks/wsim/media_1n4_asy.wsim index ede4fd7a..6dc6b652 100644 --- a/benchmarks/wsim/media_1n4_asy.wsim +++ b/benchmarks/wsim/media_1n4_asy.wsim @@ -1,3 +1,13 @@ +M.10.VCS +B.10 +M.11.VCS +B.11 +M.12.VCS +B.12 +M.13.VCS +B.13 +M.14.VCS +B.14 1.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.1000-1400.-1.0 diff --git a/benchmarks/wsim/media_1n5_480p.wsim b/benchmarks/wsim/media_1n5_480p.wsim index 9e43b984..3467a386 100644 --- a/benchmarks/wsim/media_1n5_480p.wsim +++ b/benchmarks/wsim/media_1n5_480p.wsim @@ -1,21 +1,33 @@ -1.VCS.12000-15000.0.0 +M.10.VCS +B.10 +M.11.VCS +B.11 +M.12.VCS +B.12 +M.13.VCS +B.13 +M.14.VCS +B.14 +M.15.VCS +B.15 +10.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.1000-1400.-1.0 3.RCS.10000-12000.0.0 -3.VCS.2500-3500.-1.0 +11.VCS.2500-3500.-1.0 4.RCS.1000-2200.-5.0 5.RCS.1000-1400.-1.0 5.RCS.10000-12000.0.0 -5.VCS.2500-3500.-1.0 +12.VCS.2500-3500.-1.0 6.RCS.1000-2200.-9.0 7.RCS.1000-1400.-1.0 7.RCS.10000-12000.0.0 -7.VCS.2500-3500.-1.0 +13.VCS.2500-3500.-1.0 8.RCS.1000-2200.-13.0 9.RCS.1000-1400.-1.0 9.RCS.10000-12000.0.0 -9.VCS.2500-3500.-1.0 +14.VCS.2500-3500.-1.0 10.RCS.1000-2200.-17.0 11.RCS.1000-1400.-1.0 11.RCS.10000-12000.0.0 -11.VCS.2500-3500.-1.1 +15.VCS.2500-3500.-1.1 diff --git a/benchmarks/wsim/media_1n5_asy.wsim b/benchmarks/wsim/media_1n5_asy.wsim index 78bb4a86..4b205457 100644 --- a/benchmarks/wsim/media_1n5_asy.wsim +++ b/benchmarks/wsim/media_1n5_asy.wsim @@ -1,3 +1,15 @@ +M.10.VCS +B.10 +M.11.VCS +B.11 +M.12.VCS +B.12 +M.13.VCS +B.13 +M.14.VCS +B.14 +M.15.VCS +B.15 1.VCS.12000-15000.0.0 2.RCS.2000-3000.-1.0 3.RCS.500-900.-1.0 diff --git a/benchmarks/wsim/media_load_balance_17i7.wsim b/benchmarks/wsim/media_load_balance_17i7.wsim index 0830a323..bcb1ab2f 100644 --- a/benchmarks/wsim/media_load_balance_17i7.wsim +++ b/benchmarks/wsim/media_load_balance_17i7.wsim @@ -1,7 +1,9 @@ +M.1.VCS +B.1 1.VCS.2800-3200.0.1 -1.RCS.900-1100.-1.0 -1.RCS.3600-3800.0.0 -1.RCS.900-1100.-2.0 +2.RCS.900-1100.-1.0 +2.RCS.3600-3800.0.0 +2.RCS.900-1100.-2.0 1.VCS.2200-2400.-2.0 -1.RCS.4500-4900.-1.0 +2.RCS.4500-4900.-1.0 1.VCS.500-700.-1.1 diff --git a/benchmarks/wsim/media_load_balance_19.wsim b/benchmarks/wsim/media_load_balance_19.wsim index 03890776..88cd34fb 100644 --- a/benchmarks/wsim/media_load_balance_19.wsim +++ b/benchmarks/wsim/media_load_balance_19.wsim @@ -1,3 +1,5 @@ +M.1.VCS +B.1 0.VECS.1400-1500.0.0 0.RCS.1000-1500.-1.0 s.-2 @@ -5,6 +7,6 @@ s.-2 1.VCS.1300-1400.0.1 0.VECS.1400-1500.0.0 0.RCS.100-300.-1.1 -1.RCS.1300-1500.0.0 +2.RCS.1300-1500.-3.0 1.VCS.100-300.-1.1 1.VCS.900-1400.0.1 diff --git a/benchmarks/wsim/media_load_balance_4k12u7.wsim b/benchmarks/wsim/media_load_balance_4k12u7.wsim index ff10425b..a417bb18 100644 --- a/benchmarks/wsim/media_load_balance_4k12u7.wsim +++ b/benchmarks/wsim/media_load_balance_4k12u7.wsim @@ -1,3 +1,5 @@ +M.1.VCS +B.1 1.VCS.4000-6000.0.0 2.RCS.400-800.-1.0 3.RCS.1900-2200.-1.0 diff --git a/benchmarks/wsim/media_load_balance_fhd26u7.wsim b/benchmarks/wsim/media_load_balance_fhd26u7.wsim index 56114ddc..4c8225e1 100644 --- a/benchmarks/wsim/media_load_balance_fhd26u7.wsim +++ b/benchmarks/wsim/media_load_balance_fhd26u7.wsim @@ -1,25 +1,27 @@ +M.3.VCS +B.3 1.VCS1.1200-1800.0.0 1.VCS1.1900-2100.0.0 2.RCS.1500-2000.-1.0 -2.VCS.1400-1800.-1.1 +3.VCS.1400-1800.-1.1 1.VCS1.1900-2100.-1.0 2.RCS.1500-2000.-1.0 -2.VCS.1400-1800.-1.1 +3.VCS.1400-1800.-1.1 1.VCS1.1900-2100.-1.0 2.RCS.200-400.-1.0 2.RCS.1500-2000.0.0 -2.VCS.1400-1800.-1.1 +3.VCS.1400-1800.-1.1 1.VCS1.1900-2100.-1.0 2.RCS.1500-2000.-1.0 -2.VCS.1400-1800.-1.1 +3.VCS.1400-1800.-1.1 1.VCS1.1900-2100.-1.0 2.RCS.200-400.-1.0 2.RCS.1500-2000.0.0 -2.VCS.1400-1800.-1.1 +3.VCS.1400-1800.-1.1 1.VCS1.1900-2100.-1.0 2.RCS.1500-2000.-1.0 -2.VCS.1400-1800.-1.1 +3.VCS.1400-1800.-1.1 1.VCS1.1900-2100.-1.0 2.RCS.1500-2000.-1.0 2.RCS.1500-2000.0.0 -2.VCS.1400-1800.-1.1 +3.VCS.1400-1800.-1.1 diff --git a/benchmarks/wsim/media_load_balance_hd01.wsim b/benchmarks/wsim/media_load_balance_hd01.wsim index 86293152..8e7e9d90 100644 --- a/benchmarks/wsim/media_load_balance_hd01.wsim +++ b/benchmarks/wsim/media_load_balance_hd01.wsim @@ -1,23 +1,27 @@ +M.1.VCS +B.1 +M.3.VCS +B.3 1.VCS.1400-1900.0.0 -1.RCS.1200-1600.-1.0 -1.RCS.1000-1400.-1.0 -2.VCS.800-1000.-1.0 +2.RCS.1200-1600.-1.0 +2.RCS.1000-1400.-1.0 +3.VCS.800-1000.-1.0 1.VCS.1400-1900.-4.0 -1.RCS.1200-1600.-1.0 -1.RCS.1000-1400.-1.0 -2.VCS.800-1000.-1.0 +2.RCS.1200-1600.-1.0 +2.RCS.1000-1400.-1.0 +3.VCS.800-1000.-1.0 1.VCS.1400-1900.-4.0 -1.RCS.1200-1600.-1.0 -1.RCS.1000-1400.-1.0 -2.VCS.800-1000.-1.0 +2.RCS.1200-1600.-1.0 +2.RCS.1000-1400.-1.0 +3.VCS.800-1000.-1.0 1.VCS.1400-1900.-4.0 -1.RCS.1200-1600.-1.0 -1.RCS.1000-1400.-1.0 -2.VCS.800-1000.-1.0 +2.RCS.1200-1600.-1.0 +2.RCS.1000-1400.-1.0 +3.VCS.800-1000.-1.0 1.VCS.1400-1900.-4.0 -1.RCS.1200-1600.-1.0 -1.RCS.1000-1400.-1.0 -2.VCS.800-1000.-1.0 +2.RCS.1200-1600.-1.0 +2.RCS.1000-1400.-1.0 +3.VCS.800-1000.-1.0 s.-17 s.-14 s.-11 diff --git a/benchmarks/wsim/media_load_balance_hd06mp2.wsim b/benchmarks/wsim/media_load_balance_hd06mp2.wsim index 1e1fc003..cfe98501 100644 --- a/benchmarks/wsim/media_load_balance_hd06mp2.wsim +++ b/benchmarks/wsim/media_load_balance_hd06mp2.wsim @@ -1,4 +1,8 @@ +M.1.VCS +B.1 +M.4.VCS +B.4 1.VCS.900-1700.0.0 2.RCS.100-400.-1.0 3.RCS.800-900.-1.0 -3.VCS.100-200.-1.1 +4.VCS.100-200.-1.1 diff --git a/benchmarks/wsim/media_load_balance_hd12.wsim b/benchmarks/wsim/media_load_balance_hd12.wsim index 8f3b41ca..684e6b51 100644 --- a/benchmarks/wsim/media_load_balance_hd12.wsim +++ b/benchmarks/wsim/media_load_balance_hd12.wsim @@ -1,4 +1,8 @@ +M.1.VCS +B.1 +M.4.VCS +B.4 1.VCS.850-1300.0.0 2.RCS.50-250.-1.0 3.RCS.400-800.-1.0 -3.VCS.100-200.-1.1 +4.VCS.100-200.-1.1 diff --git a/benchmarks/wsim/media_load_balance_hd17i4.wsim b/benchmarks/wsim/media_load_balance_hd17i4.wsim index b6195b60..1430f18d 100644 --- a/benchmarks/wsim/media_load_balance_hd17i4.wsim +++ b/benchmarks/wsim/media_load_balance_hd17i4.wsim @@ -1,7 +1,11 @@ +M.1.VCS +B.1 +M.3.VCS +B.3 1.VCS.900-1400.0.0 2.RCS.200-300.-1.0 2.RCS.1000-2000.0.0 2.RCS.1000-2000.0.0 -2.VCS.800-1000.-1.0 -1.RCS.2800-3100.-1.0 +3.VCS.800-1000.-1.0 +4.RCS.2800-3100.-1.0 1.VCS.800-1000.-1.1 diff --git a/benchmarks/wsim/media_mfe2_480p.wsim b/benchmarks/wsim/media_mfe2_480p.wsim index 18bc756f..00ef5c3a 100644 --- a/benchmarks/wsim/media_mfe2_480p.wsim +++ b/benchmarks/wsim/media_mfe2_480p.wsim @@ -1,3 +1,11 @@ +M.1.VCS +B.1 +M.4.VCS +B.4 +M.7.VCS +B.7 +M.8.VCS +B.8 1.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.800-1600.-1.0 @@ -5,5 +13,5 @@ 5.RCS.1000-2200.-1.0 6.RCS.800-1600.-1.0 6.RCS.10000-12000.-4.0 -6.VCS.2500-3500.-1.0 -3.VCS.2500-3500.-2.1 +7.VCS.2500-3500.-1.0 +8.VCS.2500-3500.-2.1 diff --git a/benchmarks/wsim/media_mfe3_480p.wsim b/benchmarks/wsim/media_mfe3_480p.wsim index e12a2e6a..3ac4db0e 100644 --- a/benchmarks/wsim/media_mfe3_480p.wsim +++ b/benchmarks/wsim/media_mfe3_480p.wsim @@ -1,3 +1,15 @@ +M.1.VCS +B.1 +M.4.VCS +B.4 +M.7.VCS +B.7 +M.10.VCS +B.10 +M.11.VCS +B.11 +M.12.VCS +B.12 1.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.800-1600.-1.0 @@ -8,6 +20,6 @@ 8.RCS.1000-2200.-1.0 9.RCS.800-1600.-1.0 9.RCS.10000-12000.-7/-4.0 -9.VCS.2500-3500.-1.0 -3.VCS.2500-3500.-2.0 -6.VCS.2500-3500.-3.1 +10.VCS.2500-3500.-1.0 +11.VCS.2500-3500.-2.0 +12.VCS.2500-3500.-3.1 diff --git a/benchmarks/wsim/media_mfe4_480p.wsim b/benchmarks/wsim/media_mfe4_480p.wsim index 75d4f67e..7f683156 100644 --- a/benchmarks/wsim/media_mfe4_480p.wsim +++ b/benchmarks/wsim/media_mfe4_480p.wsim @@ -1,3 +1,19 @@ +M.1.VCS +B.1 +M.4.VCS +B.4 +M.7.VCS +B.7 +M.10.VCS +B.10 +M.13.VCS +B.13 +M.14.VCS +B.14 +M.15.VCS +B.15 +M.16.VCS +B.16 1.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.800-1600.-1.0 @@ -11,7 +27,7 @@ 11.RCS.1000-2200.-1.0 12.RCS.800-1600.-1.0 12.RCS.10000-12000.-4/-7/-10.0 -12.VCS.2500-3500.-1.0 -3.VCS.2500-3500.-2.0 -6.VCS.2500-3500.-3.0 -9.VCS.2500-3500.-4.1 +13.VCS.2500-3500.-1.0 +14.VCS.2500-3500.-2.0 +15.VCS.2500-3500.-3.0 +16.VCS.2500-3500.-4.1 diff --git a/benchmarks/wsim/media_nn_1080p.wsim b/benchmarks/wsim/media_nn_1080p.wsim index f9a3ca1b..88c5c772 100644 --- a/benchmarks/wsim/media_nn_1080p.wsim +++ b/benchmarks/wsim/media_nn_1080p.wsim @@ -1,3 +1,7 @@ +M.1.VCS +B.1 +M.3.VCS +B.3 1.VCS.13000-17000.0.0 2.RCS.2000-4000.-1.0 3.RCS.3000-5000.-1.0 diff --git a/benchmarks/wsim/media_nn_1080p_s1.wsim b/benchmarks/wsim/media_nn_1080p_s1.wsim index 4fa6ca65..5b47d2a3 100644 --- a/benchmarks/wsim/media_nn_1080p_s1.wsim +++ b/benchmarks/wsim/media_nn_1080p_s1.wsim @@ -1,3 +1,5 @@ +M.4.VCS +B.4 f 1.VCS1.6500-8000.f-1.0 1.VCS2.6500-8000.f-2.0 @@ -5,4 +7,4 @@ a.-3 2.RCS.2000-4000.-2/-3.0 3.RCS.3000-5000.-1.0 3.RCS.23000-27000.0.0 -3.VCS.16000-20000.-1.1 +4.VCS.16000-20000.-1.1 diff --git a/benchmarks/wsim/media_nn_1080p_s2.wsim b/benchmarks/wsim/media_nn_1080p_s2.wsim index 68f0acdf..e3678b39 100644 --- a/benchmarks/wsim/media_nn_1080p_s2.wsim +++ b/benchmarks/wsim/media_nn_1080p_s2.wsim @@ -1,3 +1,5 @@ +M.1.VCS +B.1 1.VCS.13000-17000.0.0 2.RCS.2000-4000.-1.0 3.RCS.3000-5000.-1.0 diff --git a/benchmarks/wsim/media_nn_1080p_s3.wsim b/benchmarks/wsim/media_nn_1080p_s3.wsim index 12368da8..ee3b675d 100644 --- a/benchmarks/wsim/media_nn_1080p_s3.wsim +++ b/benchmarks/wsim/media_nn_1080p_s3.wsim @@ -1,3 +1,5 @@ +M.1.VCS +B.1 1.VCS.13000-17000.0.0 2.RCS.2000-4000.-1.0 3.RCS.3000-5000.-1.0 diff --git a/benchmarks/wsim/media_nn_480p.wsim b/benchmarks/wsim/media_nn_480p.wsim index ab64a456..73fc643d 100644 --- a/benchmarks/wsim/media_nn_480p.wsim +++ b/benchmarks/wsim/media_nn_480p.wsim @@ -1,3 +1,7 @@ +M.1.VCS +B.1 +M.3.VCS +B.3 1.VCS.12000-15000.0.0 2.RCS.1000-2200.-1.0 3.RCS.1000-1400.-1.0 diff --git a/benchmarks/wsim/vcs_balanced.wsim b/benchmarks/wsim/vcs_balanced.wsim index e8958b8f..78d953fb 100644 --- a/benchmarks/wsim/vcs_balanced.wsim +++ b/benchmarks/wsim/vcs_balanced.wsim @@ -1,26 +1,28 @@ q.5 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 -0.VCS.500-2000.0.0 +M.1.VCS +B.1 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 +1.VCS.500-2000.0.0 diff --git a/scripts/Makefile.am b/scripts/Makefile.am index e26a39e2..64171529 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -1,2 +1,2 @@ -dist_noinst_SCRIPTS = intel-gfx-trybot who.sh run-tests.sh trace.pl media-bench.pl +dist_noinst_SCRIPTS = intel-gfx-trybot who.sh run-tests.sh trace.pl noinst_PYTHON = throttle.py diff --git a/scripts/media-bench.pl b/scripts/media-bench.pl deleted file mode 100755 index 1cd8205f..00000000 --- a/scripts/media-bench.pl +++ /dev/null @@ -1,736 +0,0 @@ -#! /usr/bin/perl -# -# Copyright © 2017 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# - -use strict; -use warnings; -use 5.010; - -use Getopt::Std; - -chomp(my $igt_root = `pwd -P`); -my $wsim = "$igt_root/benchmarks/gem_wsim"; -my $wrk_root = "$igt_root/benchmarks/wsim"; -my $tracepl = "$igt_root/scripts/trace.pl"; -my $tolerance = 0.01; -my $client_target_s = 10; -my $idle_tolerance_pct = 2.0; -my $verbose = 0; -my $gt2 = 0; -my $show_cmds = 0; -my $realtime_target = 0; -my $wps_target = 0; -my $wps_target_param = 0; -my $multi_mode = 0; -my @multi_workloads; -my $w_direct; -my $balancer; -my $nop; -my %opts; - -my @balancers = ( 'rr', 'rand', 'qd', 'qdr', 'qdavg', 'rt', 'rtr', 'rtavg', - 'context', 'busy', 'busy-avg', 'i915' ); -my %bal_skip_H = ( 'rr' => 1, 'rand' => 1, 'context' => 1, , 'busy' => 1, - 'busy-avg' => 1, 'i915' => 1 ); -my %bal_skip_R = ( 'i915' => 1 ); -my %bal_skip_G = ( 'i915' => 1 ); - -my @workloads = ( - 'media_load_balance_17i7.wsim', - 'media_load_balance_19.wsim', - 'media_load_balance_4k12u7.wsim', - 'media_load_balance_fhd26u7.wsim', - 'media_load_balance_hd01.wsim', - 'media_load_balance_hd06mp2.wsim', - 'media_load_balance_hd12.wsim', - 'media_load_balance_hd17i4.wsim', - 'media_1n2_480p.wsim', - 'media_1n3_480p.wsim', - 'media_1n4_480p.wsim', - 'media_1n5_480p.wsim', - 'media_1n2_asy.wsim', - 'media_1n3_asy.wsim', - 'media_1n4_asy.wsim', - 'media_1n5_asy.wsim', - 'media_mfe2_480p.wsim', - 'media_mfe3_480p.wsim', - 'media_mfe4_480p.wsim', - 'media_nn_1080p.wsim', - 'media_nn_480p.wsim', - ); - -sub show_cmd -{ - my ($cmd) = @_; - - say "\n+++ $cmd" if $show_cmds; -} - -sub calibrate_nop -{ - my ($delay, $nop); - my $cmd = "$wsim"; - - show_cmd($cmd); - open WSIM, "$cmd |" or die; - while (<WSIM>) { - chomp; - if (/Nop calibration for (\d+)us delay is (\d+)./) { - $delay = $1; - $nop = $2; - } - - } - close WSIM; - - die unless $nop; - - return $nop -} - -sub can_balance_workload -{ - my ($wrk) = @_; - my $res = 0; - - open WRK, "$wrk_root/$wrk" or die; - while (<WRK>) { - chomp; - if (/\.VCS\./) { - $res = 1; - last; - } - } - close WRK; - - return $res; -} - -sub add_wps_arg -{ - my (@args) = @_; - my $period; - - return @args if $realtime_target <= 0; - - $period = int(1000000 / $realtime_target); - push @args, '-a'; - push @args, 'p.$period'; - - return @args; -} - -sub run_workload -{ - my (@args) = @_; - my ($time, $wps, $cmd); - my @ret; - - @args = add_wps_arg(@args); - push @args, '-2' if $gt2; - - unshift @args, $wsim; - $cmd = join ' ', @args; - show_cmd($cmd); - - open WSIM, "$cmd |" or die; - while (<WSIM>) { - chomp; - if (/^(\d+\.\d+)s elapsed \((\d+\.?\d+) workloads\/s\)$/) { - $time = $1; - $wps = $2; - } elsif (/(\d+)\: \d+\.\d+s elapsed \(\d+ cycles, (\d+\.?\d+) workloads\/s\)/) { - $ret[$1] = $2; - } - } - close WSIM; - - return ($time, $wps, \@ret); -} - -sub dump_cmd -{ - my ($cmd, $file) = @_; - - show_cmd("$cmd > $file"); - - open FOUT, '>', $file or die; - open TIN, "$cmd |" or die; - while (<TIN>) { - print FOUT $_; - } - close TIN; - close FOUT; -} - -sub trace_workload -{ - my ($wrk, $b, $r, $c) = @_; - my @args = ($tracepl, '--trace', $wsim, '-q', '-n', $nop, '-r', $r, '-c', $c); - my $min_batches = 16 + $r * $c / 2; - my @skip_engine; - my %engines; - my ($cmd, $file); - - push @args, '-2' if $gt2; - - unless ($b eq '<none>') { - push @args, '-R'; - push @args, split /\s+/, $b; - } - - if (defined $w_direct) { - push @args, split /\s+/, $wrk; - } else { - push @args, '-w'; - push @args, $wrk_root . '/' . $wrk; - } - - show_cmd(join ' ', @args); - if (-e 'perf.data') { - unlink 'perf.data' or die; - } - system(@args) == 0 or die; - - $cmd = "perf script | $tracepl"; - show_cmd($cmd); - open CMD, "$cmd |" or die; - while (<CMD>) { - chomp; - if (/Ring(\S+): (\d+) batches.*?(\d+\.?\d+)% idle,/) { - if ($2 >= $min_batches) { - $engines{$1} = $3; - } else { - push @skip_engine, $1; - } - } elsif (/GPU: (\d+\.?\d+)% idle/) { - $engines{'gpu'} = $1; - } - } - close CMD; - - $wrk =~ s/$wrk_root//g; - $wrk =~ s/\.wsim//g; - $wrk =~ s/-w/W/g; - $wrk =~ s/[ -]/_/g; - $wrk =~ s/\//-/g; - $b =~ s/[ <>]/_/g; - $file = "${wrk}_${b}_-r${r}_-c${c}"; - - dump_cmd('perf script', "${file}.trace"); - - $cmd = "perf script | $tracepl --html -x ctxsave -s -c "; - $cmd .= join ' ', map("-i $_", @skip_engine); - - dump_cmd($cmd, "${file}.html"); - - return \%engines; -} - -sub calibrate_workload -{ - my ($wrk) = @_; - my $tol = $tolerance; - my $loops = 0; - my $error; - my $r; - - $r = $realtime_target > 0 ? $realtime_target * $client_target_s : 23; - for (;;) { - my @args = ('-n', $nop, '-r', $r); - my ($time, $wps); - - if (defined $w_direct) { - push @args, split /\s+/, $wrk; - } else { - push @args, '-w'; - push @args, $wrk_root . '/' . $wrk; - } - - ($time, $wps) = run_workload(@args); - - $wps = $r / $time if $w_direct; - $error = abs($time - $client_target_s) / $client_target_s; - - last if $error <= $tol; - - $r = int($wps * $client_target_s); - $loops = $loops + 1; - if ($loops >= 3) { - $tol = $tol * (1.2 + ($tol)); - $loops = 0; - } - last if $tol > 0.2; - } - - return ($r, $error); -} - -sub find_saturation_point -{ - my ($wrk, $rr, $verbose, @args) = @_; - my ($last_wps, $c, $swps, $wwps); - my $target = $realtime_target > 0 ? $realtime_target : $wps_target; - my $r = $rr; - my $wcnt; - my $maxc; - my $max = 0; - - push @args, '-v' if $multi_mode and $w_direct; - - if (defined $w_direct) { - push @args, split /\s+/, $wrk; - $wcnt = () = $wrk =~ /-[wW]/gi; - - } else { - push @args, '-w'; - push @args, $wrk_root . '/' . $wrk; - $wcnt = 1; - } - - for ($c = 1; ; $c = $c + 1) { - my ($time, $wps); - my @args_ = (@args, ('-r', $r, '-c', $c)); - - ($time, $wps, $wwps) = run_workload(@args_); - - say " $c clients is $wps wps." if $verbose; - - if ($c > 1) { - my $delta; - - if ($target <= 0) { - if ($wps > $max) { - $max = $wps; - $maxc = $c; - } - $delta = ($wps - $last_wps) / $last_wps; - if ($delta > 0) { - last if $delta < $tolerance; - } else { - $delta = ($wps - $max) / $max; - last if abs($delta) >= $tolerance; - } - } else { - $delta = ($wps / $c - $target) / $target; - last if $delta < 0 and abs($delta) >= $tolerance; - } - $r = int($rr * ($client_target_s / $time)); - } elsif ($c == 1) { - $swps = $wps; - return ($c, $wps, $swps, $wwps) if $wcnt > 1 or - $multi_mode or - ($wps_target_param < 0 and - $wps_target == 0); - } - - $last_wps = $wps; - } - - if ($target <= 0) { - return ($maxc, $max, $swps, $wwps); - } else { - return ($c - 1, $last_wps, $swps, $wwps); - } -} - -getopts('hv2xmn:b:W:B:r:t:i:R:T:w:', \%opts); - -if (defined $opts{'h'}) { - print <<ENDHELP; -Supported options: - - -h Help text. - -v Be verbose. - -x Show external commands. - -2 Run gem_wsim in GT2 mode. - -n num Nop calibration. - -b str Balancer to pre-select. - Skips balancer auto-selection. - Passed straight the gem_wsim so use like -b "-b qd -R" - -W a,b,c Override the default list of workloads. - -B a,b,c Override the default list of balancers. - -r sec Target workload duration. - -t pct Calibration tolerance. - -i pct Engine idleness tolerance. - -R wps Run workloads in the real-time mode at wps rate. - -T wps Calibrate up to wps/client target instead of GPU saturation. - Negative values set the target based on the single client - performance where target = single-client-wps / -N. - -w str Pass-through to gem_wsim. Overrides normal workload selection. - -m Multi-workload mode. All selected workloads will be run in - parallel and overal score will be relative to when run - individually. -ENDHELP - exit 0; -} - -$verbose = 1 if defined $opts{'v'}; -$gt2 = 1 if defined $opts{'2'}; -$show_cmds = 1 if defined $opts{'x'}; -$multi_mode = 1 if defined $opts{'m'}; -if (defined $opts{'b'}) { - die unless substr($opts{'b'}, 0, 2) eq '-b'; - $balancer = $opts{'b'}; -} -if (defined $opts{'B'}) { - @balancers = split /,/, $opts{'B'}; -} else { - unshift @balancers, ''; -} -@workloads = split /,/, $opts{'W'} if defined $opts{'W'}; -$client_target_s = $opts{'r'} if defined $opts{'r'}; -$tolerance = $opts{'t'} / 100.0 if defined $opts{'t'}; -$idle_tolerance_pct = $opts{'i'} if defined $opts{'i'}; -$realtime_target = $opts{'R'} if defined $opts{'R'}; -$wps_target = $opts{'T'} if defined $opts{'T'}; -$wps_target_param = $wps_target; -$w_direct = $opts{'w'} if defined $opts{'w'}; - -if ($multi_mode) { - die if $w_direct; # Not supported - @multi_workloads = @workloads; -} - -@workloads = ($w_direct) if defined $w_direct; - -say "Workloads:"; -print map { " $_\n" } @workloads; -print "Balancers: "; -say map { "$_," } @balancers; -say "Target workload duration is ${client_target_s}s."; -say "Calibration tolerance is $tolerance."; -say "Real-time mode at ${realtime_target} wps." if $realtime_target > 0; -say "Wps target is ${wps_target} wps." if $wps_target > 0; -say "Multi-workload mode." if $multi_mode; -$nop = $opts{'n'}; -$nop = calibrate_nop() unless $nop; -say "Nop calibration is $nop."; - -goto VERIFY if defined $balancer; - -my (%best_bal, %best_bid); -my %results; -my %scores; -my %wscores; -my %cscores; -my %cwscores; -my %mscores; -my %mwscores; - -sub add_points -{ - my ($wps, $scores, $wscores) = @_; - my ($min, $max, $spread); - my @sorted; - - @sorted = sort { $b <=> $a } values %{$wps}; - $max = $sorted[0]; - $min = $sorted[-1]; - $spread = $max - $min; - die if $spread < 0; - - foreach my $w (keys %{$wps}) { - my ($score, $wscore); - - unless (exists $scores->{$w}) { - $scores->{$w} = 0; - $wscores->{$w} = 0; - } - - $score = $wps->{$w} / $max; - $scores->{$w} = $scores->{$w} + $score; - $wscore = $score * $spread / $max; - $wscores->{$w} = $wscores->{$w} + $wscore; - } -} - -my @saturation_workloads = $multi_mode ? @multi_workloads : @workloads; -my %allwps; -my $widx = 0; - -push @saturation_workloads, '-w ' . join ' -w ', map("$wrk_root/$_", @workloads) - if $multi_mode; - -foreach my $wrk (@saturation_workloads) { - my @args = ( "-n $nop"); - my ($r, $error, $should_b, $best); - my (%wps, %cwps, %mwps); - my @sorted; - my $range; - - $w_direct = $wrk if $multi_mode and $widx == $#saturation_workloads; - - $should_b = 1; - $should_b = can_balance_workload($wrk) unless defined $w_direct; - - print "\nEvaluating '$wrk'..."; - - ($r, $error) = calibrate_workload($wrk); - say " ${client_target_s}s is $r workloads. (error=$error)"; - - say " Finding saturation points for '$wrk'..."; - - BAL: foreach my $bal (@balancers) { - GBAL: foreach my $G ('', '-G', '-d', '-G -d') { - foreach my $H ('', '-H') { - my @xargs; - my ($w, $c, $s, $bwwps); - my $bid; - - if ($bal ne '') { - next GBAL if $G =~ '-G' and exists $bal_skip_G{$bal}; - - push @xargs, "-b $bal"; - push @xargs, '-R' unless exists $bal_skip_R{$bal}; - push @xargs, $G if $G ne ''; - push @xargs, $H if $H ne ''; - $bid = join ' ', @xargs; - print " $bal balancer ('$bid'): "; - } else { - $bid = '<none>'; - print " No balancing: "; - } - - $wps_target = 0 if $wps_target_param < 0; - - ($c, $w, $s, $bwwps) = - find_saturation_point($wrk, $r, 0, - (@args, @xargs)); - - if ($wps_target_param < 0) { - $wps_target = $s / -$wps_target_param; - - ($c, $w, $s, $bwwps) = - find_saturation_point($wrk, $r, - 0, - (@args, - @xargs)); - } - - if ($multi_mode and $w_direct) { - my $widx; - - die unless scalar(@multi_workloads) == - scalar(@{$bwwps}); - die unless scalar(@multi_workloads) == - scalar(keys %allwps); - - # Total of all workload wps from the - # mixed run. - $w = 0; - foreach $widx (0..$#{$bwwps}) { - $w += $bwwps->[$widx]; - } - - # Total of all workload wps from when - # ran individually with the best - # balancer. - my $tot = 0; - foreach my $wrk (@multi_workloads) { - $tot += $allwps{$wrk}->{$best_bid{$wrk}}; - } - - # Normalize mixed sum with sum of - # individual runs. - $w *= 100; - $w /= $tot; - - # Second metric is average of each - # workload wps normalized by their - # individual run performance with the - # best balancer. - $s = 0; - $widx = 0; - foreach my $wrk (@multi_workloads) { - $s += 100 * $bwwps->[$widx] / - $allwps{$wrk}->{$best_bid{$wrk}}; - $widx++; - } - $s /= scalar(@multi_workloads); - - say sprintf('Aggregate (normalized) %.2f%%; fairness %.2f%%', - $w, $s); - } else { - $allwps{$wrk} = \%wps; - } - - $wps{$bid} = $w; - $cwps{$bid} = $s; - - if ($realtime_target > 0 || $wps_target_param > 0) { - $mwps{$bid} = $w * $c; - } else { - $mwps{$bid} = $w + $s; - } - - say "$c clients ($w wps, $s wps single client, score=$mwps{$bid})." - unless $multi_mode and $w_direct; - - last BAL unless $should_b; - next BAL if $bal eq ''; - next GBAL if exists $bal_skip_H{$bal}; - } - } - } - - $widx++; - - @sorted = sort { $mwps{$b} <=> $mwps{$a} } keys %mwps; - $best_bid{$wrk} = $sorted[0]; - @sorted = sort { $b <=> $a } values %mwps; - $range = 1 - $sorted[-1] / $sorted[0]; - $best_bal{$wrk} = $sorted[0]; - - next if $multi_mode and not $w_direct; - - say " Best balancer is '$best_bid{$wrk}' (range=$range)."; - - - $results{$wrk} = \%mwps; - - add_points(\%wps, \%scores, \%wscores); - add_points(\%mwps, \%mscores, \%mwscores); - add_points(\%cwps, \%cscores, \%cwscores); -} - -sub dump_scoreboard -{ - my ($n, $h) = @_; - my ($i, $str, $balancer); - my ($max, $range); - my @sorted; - - @sorted = sort { $b <=> $a } values %{$h}; - $max = $sorted[0]; - $range = 1 - $sorted[-1] / $max; - $str = "$n rank (range=$range):"; - say "\n$str"; - say '=' x length($str); - $i = 1; - foreach my $w (sort { $h->{$b} <=> $h->{$a} } keys %{$h}) { - my $score; - - $balancer = $w if $i == 1; - $score = $h->{$w} / $max; - - say " $i: '$w' ($score)"; - - $i = $i + 1; - } - - return $balancer; -} - -dump_scoreboard($multi_mode ? 'Throughput' : 'Total wps', \%scores); -dump_scoreboard('Total weighted wps', \%wscores) unless $multi_mode; -dump_scoreboard($multi_mode ? 'Fairness' : 'Per client wps', \%cscores); -dump_scoreboard('Per client weighted wps', \%cwscores) unless $multi_mode; -$balancer = dump_scoreboard($multi_mode ? 'Combined' : 'Combined wps', \%mscores); -$balancer = dump_scoreboard('Combined weighted wps', \%mwscores) unless $multi_mode; - -VERIFY: - -my %problem_wrk; - -die unless defined $balancer; - -say "\nBalancer is '$balancer'."; -say "Idleness tolerance is $idle_tolerance_pct%."; - -if ($multi_mode) { - $w_direct = '-w ' . join ' -w ', map("$wrk_root/$_", @workloads); - @workloads = ($w_direct); -} - -foreach my $wrk (@workloads) { - my @args = ( "-n $nop" ); - my ($r, $error, $c, $wps, $swps); - my $saturated = 0; - my $result = 'Pass'; - my $vcs2 = $gt2 ? '1:0' : '2:1'; - my %problem; - my $engines; - - next if not defined $w_direct and not can_balance_workload($wrk); - - push @args, $balancer unless $balancer eq '<none>'; - - if (scalar(keys %results)) { - $r = $results{$wrk}->{$balancer} / $best_bal{$wrk} * 100.0; - } else { - $r = '---'; - } - say " \nProfiling '$wrk' ($r% of best)..."; - - ($r, $error) = calibrate_workload($wrk); - say " ${client_target_s}s is $r workloads. (error=$error)"; - - ($c, $wps, $swps) = find_saturation_point($wrk, $r, $verbose, @args); - say " Saturation at $c clients ($wps workloads/s)."; - push @args, "-c $c"; - - $engines = trace_workload($wrk, $balancer, $r, $c); - - foreach my $key (keys %{$engines}) { - next if $key eq 'gpu'; - $saturated = $saturated + 1 - if $engines->{$key} < $idle_tolerance_pct; - } - - if ($saturated == 0) { - # Not a single saturated engine - $result = 'FAIL'; - } elsif (not exists $engines->{'2:0'} or not exists $engines->{$vcs2}) { - # VCS1 and VCS2 not present in a balancing workload - $result = 'FAIL'; - } elsif ($saturated == 1 and - ($engines->{'2:0'} < $idle_tolerance_pct or - $engines->{$vcs2} < $idle_tolerance_pct)) { - # Only one VCS saturated - $result = 'WARN'; - } - - $result = 'WARN' if $engines->{'gpu'} > $idle_tolerance_pct; - - if ($result ne 'Pass') { - $problem{'c'} = $c; - $problem{'r'} = $r; - $problem{'stats'} = $engines; - $problem_wrk{$wrk} = \%problem; - } - - print " $result ["; - print map " $_: $engines->{$_}%,", sort keys %{$engines}; - say " ]"; -} - -say "\nProblematic workloads were:" if scalar(keys %problem_wrk) > 0; -foreach my $wrk (sort keys %problem_wrk) { - my $problem = $problem_wrk{$wrk}; - - print " $wrk -c $problem->{'c'} -r $problem->{'r'} ["; - print map " $_: $problem->{'stats'}->{$_}%,", - sort keys %{$problem->{'stats'}}; - say " ]"; -} |