diff options
-rw-r--r-- | benchmarks/gem_wsim.c | 319 |
1 files changed, 261 insertions, 58 deletions
diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c index 9156fdc9..0ef1cf28 100644 --- a/benchmarks/gem_wsim.c +++ b/benchmarks/gem_wsim.c @@ -23,6 +23,7 @@ */ #include <unistd.h> +#include <stdbool.h> #include <stdlib.h> #include <stdint.h> #include <stdio.h> @@ -55,6 +56,7 @@ #include "i915/gem_mman.h" #include "ewma.h" +#include "i915/gem_engine_topology.h" enum intel_engine_id { DEFAULT, @@ -240,7 +242,8 @@ struct workload struct intel_mmio_data mmio_data; static const unsigned int nop_calibration_us = 1000; -static unsigned long nop_calibration; +static bool has_nop_calibration = false; +static bool sequential = true; static unsigned int master_prng; @@ -281,6 +284,61 @@ static const char *ring_str_map[NUM_ENGINES] = { [VECS] = "VECS", }; +/* stores calibrations for particular engines */ +static unsigned long engine_calib_map[NUM_ENGINES]; + +static enum intel_engine_id +ci_to_engine_id(int class, int instance) +{ + static const struct { + int class; + int instance; + unsigned int id; + } map[] = { + { I915_ENGINE_CLASS_RENDER, 0, RCS }, + { I915_ENGINE_CLASS_COPY, 0, BCS }, + { I915_ENGINE_CLASS_VIDEO, 0, VCS1 }, + { I915_ENGINE_CLASS_VIDEO, 1, VCS2 }, + { I915_ENGINE_CLASS_VIDEO, 2, VCS2 }, /* FIXME/ICL */ + { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS }, + }; + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(map); i++) { + if (class == map[i].class && instance == map[i].instance) + return map[i].id; + } + return -1; +} + +static void +apply_unset_calibrations(unsigned long raw_number) +{ + for (int i = 0; i < NUM_ENGINES; i++) + engine_calib_map[i] += engine_calib_map[i] ? 0 : raw_number; +} + +static void +print_engine_calibrations(void) +{ + bool first_entry = true; + + printf("Nop calibration for %uus delay is: ", nop_calibration_us); + for (int i = 0; i < NUM_ENGINES; i++) { + /* skip DEFAULT and VCS engines */ + if (i != DEFAULT && i != VCS) { + if (first_entry) { + printf("%s=%lu", ring_str_map[i], engine_calib_map[i]); + first_entry = false; + } else { + printf(",%s=%lu", ring_str_map[i], engine_calib_map[i]); + } + } + } + printf("\n"); +} + static int parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc) { @@ -1082,17 +1140,17 @@ static unsigned int get_duration(struct workload *wrk, struct w_step *w) (dur->max + 1 - dur->min); } -static unsigned long get_bb_sz(unsigned int duration) +static unsigned long get_bb_sz(unsigned int duration, enum intel_engine_id engine) { - return ALIGN(duration * nop_calibration * sizeof(uint32_t) / - nop_calibration_us, sizeof(uint32_t)); + return ALIGN(duration * engine_calib_map[engine] * sizeof(uint32_t) / + nop_calibration_us, sizeof(uint32_t)); } static void init_bb(struct w_step *w, unsigned int flags) { const unsigned int arb_period = - get_bb_sz(w->preempt_us) / sizeof(uint32_t); + get_bb_sz(w->preempt_us, w->engine) / sizeof(uint32_t); const unsigned int mmap_len = ALIGN(w->bb_sz, 4096); unsigned int i; uint32_t *ptr; @@ -1319,10 +1377,11 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags) if (w->unbound_duration) /* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */ - w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us)) + + w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us, w->engine)) + (1 + 3) * sizeof(uint32_t); else - w->bb_sz = get_bb_sz(w->duration.max); + w->bb_sz = get_bb_sz(w->duration.max, w->engine); + w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0)); init_bb(w, flags); w->obj[j].relocation_count = terminate_bb(w, flags); @@ -2622,7 +2681,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine, w->eb.batch_start_offset = w->unbound_duration ? 0 : - ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w)), + ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w), w->engine), 2 * sizeof(uint32_t)); for (i = 0; i < w->fence_deps.nr; i++) { @@ -2899,15 +2958,18 @@ static void fini_workload(struct workload *wrk) free(wrk); } -static unsigned long calibrate_nop(unsigned int tolerance_pct) +static unsigned long calibrate_nop(unsigned int tolerance_pct, struct intel_execution_engine2 *engine) { const uint32_t bbe = 0xa << 23; unsigned int loops = 17; unsigned int usecs = nop_calibration_us; struct drm_i915_gem_exec_object2 obj = {}; - struct drm_i915_gem_execbuffer2 eb = - { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj}; - long size, last_size; + struct drm_i915_gem_execbuffer2 eb = { + .buffer_count = 1, + .buffers_ptr = (uintptr_t)&obj, + .flags = engine->flags + }; + unsigned long size, last_size; struct timespec t_0, t_end; clock_gettime(CLOCK_MONOTONIC, &t_0); @@ -2939,6 +3001,70 @@ static unsigned long calibrate_nop(unsigned int tolerance_pct) return size / sizeof(uint32_t); } +static void +calibrate_sequentially(void) +{ + struct intel_execution_engine2 *engine; + enum intel_engine_id eng_id; + + __for_each_physical_engine(fd, engine) { + eng_id = ci_to_engine_id(engine->class, engine->instance); + igt_assert(eng_id >= 0); + engine_calib_map[eng_id] = calibrate_nop(fd, engine); + } +} + +struct thread_data { + struct intel_execution_engine2 *eng; + pthread_t thr; + unsigned long calib; +}; + +static void * +engine_calibration_thread(void *data) +{ + struct thread_data *thr_d = (struct thread_data *) data; + + thr_d->calib = calibrate_nop(fd, thr_d->eng); + return NULL; +} + +static void +calibrate_in_parallel(void) +{ + struct thread_data *thr_d = malloc(NUM_ENGINES * sizeof(*thr_d)); + struct intel_execution_engine2 *engine; + enum intel_engine_id id; + int ret; + + __for_each_physical_engine(fd, engine) { + id = ci_to_engine_id(engine->class, engine->instance); + thr_d[id].eng = engine; + ret = pthread_create(&thr_d[id].thr, NULL, engine_calibration_thread, &thr_d[id]); + igt_assert_eq(ret, 0); + } + + __for_each_physical_engine(fd, engine) { + id = ci_to_engine_id(engine->class, engine->instance); + igt_assert(id >= 0); + + ret = pthread_join(thr_d[id].thr, NULL); + igt_assert_eq(ret, 0); + engine_calib_map[id] = thr_d[id].calib; + } + + free(thr_d); +} + +static void +calibrate_engines(void) +{ + if (sequential) + calibrate_sequentially(); + else + calibrate_in_parallel(); +} + static void print_help(void) { unsigned int i; @@ -2951,50 +3077,53 @@ static void print_help(void) "be provided when running the simulation in subsequent invocations.\n" "\n" "Options:\n" -" -h This text.\n" -" -q Be quiet - do not output anything to stdout.\n" -" -n <n> Nop calibration value.\n" -" -t <n> Nop calibration tolerance percentage.\n" -" Use when there is a difficulty obtaining calibration with the\n" -" default settings.\n" -" -I <n> Initial randomness seed.\n" -" -p <n> Context priority to use for the following workload on the\n" -" command line.\n" -" -w <desc|path> Filename or a workload descriptor.\n" -" Can be given multiple times.\n" -" -W <desc|path> Filename or a master workload descriptor.\n" -" Only one master workload can be optinally specified in which\n" -" case all other workloads become background ones and run as\n" -" long as the master.\n" -" -a <desc|path> Append a workload to all other workloads.\n" -" -r <n> How many times to emit the workload.\n" -" -c <n> Fork N clients emitting the workload simultaneously.\n" -" -x Swap VCS1 and VCS2 engines in every other client.\n" -" -b <n> Load balancing to use.\n" -" Available load balancers are:" +" -h This text.\n" +" -q Be quiet - do not output anything to stdout.\n" +" -n <n | Nop calibration value - single value is set to all engines\n" +" e1=v1,e2=v2,n...> without specified value; you can also specify calibrations for\n" +" particular engines.\n" +" -t <n> Nop calibration tolerance percentage.\n" +" -T Disable sequential calibration and perform calibration in parallel.\n" +" Use when there is a difficulty obtaining calibration with the\n" +" default settings.\n" +" -I <n> Initial randomness seed.\n" +" -p <n> Context priority to use for the following workload on the\n" +" command line.\n" +" -w <desc|path> Filename or a workload descriptor.\n" +" Can be given multiple times.\n" +" -W <desc|path> Filename or a master workload descriptor.\n" +" Only one master workload can be optinally specified in which\n" +" case all other workloads become background ones and run as\n" +" long as the master.\n" +" -a <desc|path> Append a workload to all other workloads.\n" +" -r <n> How many times to emit the workload.\n" +" -c <n> Fork N clients emitting the workload simultaneously.\n" +" -x Swap VCS1 and VCS2 engines in every other client.\n" +" -b <n> Load balancing to use.\n" +" Available load balancers are:" ); for (i = 0; i < ARRAY_SIZE(all_balancers); i++) { igt_assert(all_balancers[i].desc); printf( -" %s (%u): %s\n", +" %s (%u): %s\n", all_balancers[i].name, all_balancers[i].id, all_balancers[i].desc); } puts( -" Balancers can be specified either as names or as their id\n" -" number as listed above.\n" -" -2 Remap VCS2 to BCS.\n" -" -R Round-robin initial VCS assignment per client.\n" -" -H Send heartbeat on synchronisation points with seqno based\n" -" balancers. Gives better engine busyness view in some cases.\n" -" -s Turn on small SSEU config for the next workload on the\n" -" command line. Subsequent -s switches it off.\n" -" -S Synchronize the sequence of random batch durations between\n" -" clients.\n" -" -G Global load balancing - a single load balancer will be shared\n" -" between all clients and there will be a single seqno domain.\n" -" -d Sync between data dependencies in userspace." +" Balancers can be specified either as names or as their id\n" +" number as listed above.\n" +" -2 Remap VCS2 to BCS.\n" +" -R Round-robin initial VCS assignment per client.\n" +" -H Send heartbeat on synchronisation points with seqno based\n" +" balancers. Gives better engine busyness view in some cases.\n" +" -s Turn on small SSEU config for the next workload on the\n" +" command line. Subsequent -s switches it off.\n" +" -S Synchronize the sequence of random batch durations between\n" +" clients.\n" +" -G Global load balancing - a single load balancer will be shared\n" +" between all clients and there will be a single seqno domain.\n" +" -d Sync between data dependencies in userspace." ); } @@ -3117,6 +3246,10 @@ int main(int argc, char **argv) int prio = 0; double t; int i, c; + char *subopts, *value; + int raw_number = 0; + long calib_val; + int eng; /* * Open the device via the low-level API so we can do the GPU quiesce @@ -3134,7 +3267,7 @@ int main(int argc, char **argv) master_prng = time(NULL); while ((c = getopt(argc, argv, - "hqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) { + "Thqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) { switch (c) { case 'W': if (master_workload >= 0) { @@ -3163,8 +3296,62 @@ int main(int argc, char **argv) case 't': tolerance_pct = strtol(optarg, NULL, 0); break; + case 'T': + sequential = false; + break; + case 'n': - nop_calibration = strtol(optarg, NULL, 0); + subopts = optarg; + while (*subopts != '\0') { + eng = getsubopt(&subopts, (char **)ring_str_map, &value); + if (!value) { + /* only engine name was given */ + wsim_err("Missing calibration value for '%s'!\n", + ring_str_map[eng]); + goto err; + } + + calib_val = atol(value); + + if (eng >= 0 && eng < NUM_ENGINES) { + /* engine name with some value were given */ + + if (eng == DEFAULT || eng == VCS) { + wsim_err("'%s' not allowed in engine calibrations!\n", + ring_str_map[eng]); + goto err; + } else if (calib_val <= 0) { + wsim_err("Invalid calibration for engine '%s' - value " + "is either non-positive or is not a number!\n", + ring_str_map[eng]); + goto err; + } else if (engine_calib_map[eng]) { + wsim_err("Invalid repeated calibration of '%s'!\n", + ring_str_map[eng]); + goto err; + } else { + engine_calib_map[eng] = calib_val; + has_nop_calibration = true; + } + } else { + /* raw number was given */ + + if (!calib_val) { + wsim_err("Invalid engine or zero calibration!\n"); + goto err; + } else if (calib_val < 0) { + wsim_err("Invalid negative calibration!\n"); + goto err; + } else if (raw_number) { + wsim_err("Default engine calibration provided more than once!\n"); + goto err; + } else { + raw_number = calib_val; + apply_unset_calibrations(raw_number); + has_nop_calibration = true; + } + } + } break; case 'r': repeat = strtol(optarg, NULL, 0); @@ -3242,16 +3429,33 @@ int main(int argc, char **argv) goto err; } - if (!nop_calibration) { - if (verbose > 1) - printf("Calibrating nop delay with %u%% tolerance...\n", + if (!has_nop_calibration) { + if (verbose > 1) { + printf("Calibrating nop delays with %u%% tolerance...\n", tolerance_pct); - nop_calibration = calibrate_nop(tolerance_pct); - if (verbose) - printf("Nop calibration for %uus delay is %lu.\n", - nop_calibration_us, nop_calibration); + } + calibrate_engines(); + + if (verbose) + print_engine_calibrations(); goto out; + } else { + bool missing = false; + + for (i = 0; i < NUM_ENGINES; i++) { + if (i == DEFAULT || i == VCS) + continue; + + if (!engine_calib_map[i]) { + wsim_err("Missing calibration for '%s'!\n", + ring_str_map[i]); + missing = true; + } + } + + if (missing) + goto err; } if (!nr_w_args) { @@ -3309,8 +3513,7 @@ int main(int argc, char **argv) if (verbose > 1) { printf("Random seed is %u.\n", master_prng); - printf("Using %lu nop calibration for %uus delay.\n", - nop_calibration, nop_calibration_us); + print_engine_calibrations(); printf("%u client%s.\n", clients, clients > 1 ? "s" : ""); if (flags & SWAPVCS) printf("Swapping VCS rings between clients.\n"); |