summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--benchmarks/gem_wsim.c319
1 files changed, 261 insertions, 58 deletions
diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index 9156fdc9..0ef1cf28 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -23,6 +23,7 @@
*/
#include <unistd.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
@@ -55,6 +56,7 @@
#include "i915/gem_mman.h"
#include "ewma.h"
+#include "i915/gem_engine_topology.h"
enum intel_engine_id {
DEFAULT,
@@ -240,7 +242,8 @@ struct workload
struct intel_mmio_data mmio_data;
static const unsigned int nop_calibration_us = 1000;
-static unsigned long nop_calibration;
+static bool has_nop_calibration = false;
+static bool sequential = true;
static unsigned int master_prng;
@@ -281,6 +284,61 @@ static const char *ring_str_map[NUM_ENGINES] = {
[VECS] = "VECS",
};
+/* stores calibrations for particular engines */
+static unsigned long engine_calib_map[NUM_ENGINES];
+
+static enum intel_engine_id
+ci_to_engine_id(int class, int instance)
+{
+ static const struct {
+ int class;
+ int instance;
+ unsigned int id;
+ } map[] = {
+ { I915_ENGINE_CLASS_RENDER, 0, RCS },
+ { I915_ENGINE_CLASS_COPY, 0, BCS },
+ { I915_ENGINE_CLASS_VIDEO, 0, VCS1 },
+ { I915_ENGINE_CLASS_VIDEO, 1, VCS2 },
+ { I915_ENGINE_CLASS_VIDEO, 2, VCS2 }, /* FIXME/ICL */
+ { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS },
+ };
+
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(map); i++) {
+ if (class == map[i].class && instance == map[i].instance)
+ return map[i].id;
+ }
+ return -1;
+}
+
+static void
+apply_unset_calibrations(unsigned long raw_number)
+{
+ for (int i = 0; i < NUM_ENGINES; i++)
+ engine_calib_map[i] += engine_calib_map[i] ? 0 : raw_number;
+}
+
+static void
+print_engine_calibrations(void)
+{
+ bool first_entry = true;
+
+ printf("Nop calibration for %uus delay is: ", nop_calibration_us);
+ for (int i = 0; i < NUM_ENGINES; i++) {
+ /* skip DEFAULT and VCS engines */
+ if (i != DEFAULT && i != VCS) {
+ if (first_entry) {
+ printf("%s=%lu", ring_str_map[i], engine_calib_map[i]);
+ first_entry = false;
+ } else {
+ printf(",%s=%lu", ring_str_map[i], engine_calib_map[i]);
+ }
+ }
+ }
+ printf("\n");
+}
+
static int
parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
{
@@ -1082,17 +1140,17 @@ static unsigned int get_duration(struct workload *wrk, struct w_step *w)
(dur->max + 1 - dur->min);
}
-static unsigned long get_bb_sz(unsigned int duration)
+static unsigned long get_bb_sz(unsigned int duration, enum intel_engine_id engine)
{
- return ALIGN(duration * nop_calibration * sizeof(uint32_t) /
- nop_calibration_us, sizeof(uint32_t));
+ return ALIGN(duration * engine_calib_map[engine] * sizeof(uint32_t) /
+ nop_calibration_us, sizeof(uint32_t));
}
static void
init_bb(struct w_step *w, unsigned int flags)
{
const unsigned int arb_period =
- get_bb_sz(w->preempt_us) / sizeof(uint32_t);
+ get_bb_sz(w->preempt_us, w->engine) / sizeof(uint32_t);
const unsigned int mmap_len = ALIGN(w->bb_sz, 4096);
unsigned int i;
uint32_t *ptr;
@@ -1319,10 +1377,11 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
if (w->unbound_duration)
/* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
- w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us)) +
+ w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us, w->engine)) +
(1 + 3) * sizeof(uint32_t);
else
- w->bb_sz = get_bb_sz(w->duration.max);
+ w->bb_sz = get_bb_sz(w->duration.max, w->engine);
+
w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
init_bb(w, flags);
w->obj[j].relocation_count = terminate_bb(w, flags);
@@ -2622,7 +2681,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine,
w->eb.batch_start_offset =
w->unbound_duration ?
0 :
- ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w)),
+ ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w), w->engine),
2 * sizeof(uint32_t));
for (i = 0; i < w->fence_deps.nr; i++) {
@@ -2899,15 +2958,18 @@ static void fini_workload(struct workload *wrk)
free(wrk);
}
-static unsigned long calibrate_nop(unsigned int tolerance_pct)
+static unsigned long calibrate_nop(unsigned int tolerance_pct, struct intel_execution_engine2 *engine)
{
const uint32_t bbe = 0xa << 23;
unsigned int loops = 17;
unsigned int usecs = nop_calibration_us;
struct drm_i915_gem_exec_object2 obj = {};
- struct drm_i915_gem_execbuffer2 eb =
- { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
- long size, last_size;
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffer_count = 1,
+ .buffers_ptr = (uintptr_t)&obj,
+ .flags = engine->flags
+ };
+ unsigned long size, last_size;
struct timespec t_0, t_end;
clock_gettime(CLOCK_MONOTONIC, &t_0);
@@ -2939,6 +3001,70 @@ static unsigned long calibrate_nop(unsigned int tolerance_pct)
return size / sizeof(uint32_t);
}
+static void
+calibrate_sequentially(void)
+{
+ struct intel_execution_engine2 *engine;
+ enum intel_engine_id eng_id;
+
+ __for_each_physical_engine(fd, engine) {
+ eng_id = ci_to_engine_id(engine->class, engine->instance);
+ igt_assert(eng_id >= 0);
+ engine_calib_map[eng_id] = calibrate_nop(fd, engine);
+ }
+}
+
+struct thread_data {
+ struct intel_execution_engine2 *eng;
+ pthread_t thr;
+ unsigned long calib;
+};
+
+static void *
+engine_calibration_thread(void *data)
+{
+ struct thread_data *thr_d = (struct thread_data *) data;
+
+ thr_d->calib = calibrate_nop(fd, thr_d->eng);
+ return NULL;
+}
+
+static void
+calibrate_in_parallel(void)
+{
+ struct thread_data *thr_d = malloc(NUM_ENGINES * sizeof(*thr_d));
+ struct intel_execution_engine2 *engine;
+ enum intel_engine_id id;
+ int ret;
+
+ __for_each_physical_engine(fd, engine) {
+ id = ci_to_engine_id(engine->class, engine->instance);
+ thr_d[id].eng = engine;
+ ret = pthread_create(&thr_d[id].thr, NULL, engine_calibration_thread, &thr_d[id]);
+ igt_assert_eq(ret, 0);
+ }
+
+ __for_each_physical_engine(fd, engine) {
+ id = ci_to_engine_id(engine->class, engine->instance);
+ igt_assert(id >= 0);
+
+ ret = pthread_join(thr_d[id].thr, NULL);
+ igt_assert_eq(ret, 0);
+ engine_calib_map[id] = thr_d[id].calib;
+ }
+
+ free(thr_d);
+}
+
+static void
+calibrate_engines(void)
+{
+ if (sequential)
+ calibrate_sequentially();
+ else
+ calibrate_in_parallel();
+}
+
static void print_help(void)
{
unsigned int i;
@@ -2951,50 +3077,53 @@ static void print_help(void)
"be provided when running the simulation in subsequent invocations.\n"
"\n"
"Options:\n"
-" -h This text.\n"
-" -q Be quiet - do not output anything to stdout.\n"
-" -n <n> Nop calibration value.\n"
-" -t <n> Nop calibration tolerance percentage.\n"
-" Use when there is a difficulty obtaining calibration with the\n"
-" default settings.\n"
-" -I <n> Initial randomness seed.\n"
-" -p <n> Context priority to use for the following workload on the\n"
-" command line.\n"
-" -w <desc|path> Filename or a workload descriptor.\n"
-" Can be given multiple times.\n"
-" -W <desc|path> Filename or a master workload descriptor.\n"
-" Only one master workload can be optinally specified in which\n"
-" case all other workloads become background ones and run as\n"
-" long as the master.\n"
-" -a <desc|path> Append a workload to all other workloads.\n"
-" -r <n> How many times to emit the workload.\n"
-" -c <n> Fork N clients emitting the workload simultaneously.\n"
-" -x Swap VCS1 and VCS2 engines in every other client.\n"
-" -b <n> Load balancing to use.\n"
-" Available load balancers are:"
+" -h This text.\n"
+" -q Be quiet - do not output anything to stdout.\n"
+" -n <n | Nop calibration value - single value is set to all engines\n"
+" e1=v1,e2=v2,n...> without specified value; you can also specify calibrations for\n"
+" particular engines.\n"
+" -t <n> Nop calibration tolerance percentage.\n"
+" -T Disable sequential calibration and perform calibration in parallel.\n"
+" Use when there is a difficulty obtaining calibration with the\n"
+" default settings.\n"
+" -I <n> Initial randomness seed.\n"
+" -p <n> Context priority to use for the following workload on the\n"
+" command line.\n"
+" -w <desc|path> Filename or a workload descriptor.\n"
+" Can be given multiple times.\n"
+" -W <desc|path> Filename or a master workload descriptor.\n"
+" Only one master workload can be optinally specified in which\n"
+" case all other workloads become background ones and run as\n"
+" long as the master.\n"
+" -a <desc|path> Append a workload to all other workloads.\n"
+" -r <n> How many times to emit the workload.\n"
+" -c <n> Fork N clients emitting the workload simultaneously.\n"
+" -x Swap VCS1 and VCS2 engines in every other client.\n"
+" -b <n> Load balancing to use.\n"
+" Available load balancers are:"
);
for (i = 0; i < ARRAY_SIZE(all_balancers); i++) {
igt_assert(all_balancers[i].desc);
printf(
-" %s (%u): %s\n",
+" %s (%u): %s\n",
all_balancers[i].name, all_balancers[i].id,
all_balancers[i].desc);
}
puts(
-" Balancers can be specified either as names or as their id\n"
-" number as listed above.\n"
-" -2 Remap VCS2 to BCS.\n"
-" -R Round-robin initial VCS assignment per client.\n"
-" -H Send heartbeat on synchronisation points with seqno based\n"
-" balancers. Gives better engine busyness view in some cases.\n"
-" -s Turn on small SSEU config for the next workload on the\n"
-" command line. Subsequent -s switches it off.\n"
-" -S Synchronize the sequence of random batch durations between\n"
-" clients.\n"
-" -G Global load balancing - a single load balancer will be shared\n"
-" between all clients and there will be a single seqno domain.\n"
-" -d Sync between data dependencies in userspace."
+" Balancers can be specified either as names or as their id\n"
+" number as listed above.\n"
+" -2 Remap VCS2 to BCS.\n"
+" -R Round-robin initial VCS assignment per client.\n"
+" -H Send heartbeat on synchronisation points with seqno based\n"
+" balancers. Gives better engine busyness view in some cases.\n"
+" -s Turn on small SSEU config for the next workload on the\n"
+" command line. Subsequent -s switches it off.\n"
+" -S Synchronize the sequence of random batch durations between\n"
+" clients.\n"
+" -G Global load balancing - a single load balancer will be shared\n"
+" between all clients and there will be a single seqno domain.\n"
+" -d Sync between data dependencies in userspace."
);
}
@@ -3117,6 +3246,10 @@ int main(int argc, char **argv)
int prio = 0;
double t;
int i, c;
+ char *subopts, *value;
+ int raw_number = 0;
+ long calib_val;
+ int eng;
/*
* Open the device via the low-level API so we can do the GPU quiesce
@@ -3134,7 +3267,7 @@ int main(int argc, char **argv)
master_prng = time(NULL);
while ((c = getopt(argc, argv,
- "hqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) {
+ "Thqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) {
switch (c) {
case 'W':
if (master_workload >= 0) {
@@ -3163,8 +3296,62 @@ int main(int argc, char **argv)
case 't':
tolerance_pct = strtol(optarg, NULL, 0);
break;
+ case 'T':
+ sequential = false;
+ break;
+
case 'n':
- nop_calibration = strtol(optarg, NULL, 0);
+ subopts = optarg;
+ while (*subopts != '\0') {
+ eng = getsubopt(&subopts, (char **)ring_str_map, &value);
+ if (!value) {
+ /* only engine name was given */
+ wsim_err("Missing calibration value for '%s'!\n",
+ ring_str_map[eng]);
+ goto err;
+ }
+
+ calib_val = atol(value);
+
+ if (eng >= 0 && eng < NUM_ENGINES) {
+ /* engine name with some value were given */
+
+ if (eng == DEFAULT || eng == VCS) {
+ wsim_err("'%s' not allowed in engine calibrations!\n",
+ ring_str_map[eng]);
+ goto err;
+ } else if (calib_val <= 0) {
+ wsim_err("Invalid calibration for engine '%s' - value "
+ "is either non-positive or is not a number!\n",
+ ring_str_map[eng]);
+ goto err;
+ } else if (engine_calib_map[eng]) {
+ wsim_err("Invalid repeated calibration of '%s'!\n",
+ ring_str_map[eng]);
+ goto err;
+ } else {
+ engine_calib_map[eng] = calib_val;
+ has_nop_calibration = true;
+ }
+ } else {
+ /* raw number was given */
+
+ if (!calib_val) {
+ wsim_err("Invalid engine or zero calibration!\n");
+ goto err;
+ } else if (calib_val < 0) {
+ wsim_err("Invalid negative calibration!\n");
+ goto err;
+ } else if (raw_number) {
+ wsim_err("Default engine calibration provided more than once!\n");
+ goto err;
+ } else {
+ raw_number = calib_val;
+ apply_unset_calibrations(raw_number);
+ has_nop_calibration = true;
+ }
+ }
+ }
break;
case 'r':
repeat = strtol(optarg, NULL, 0);
@@ -3242,16 +3429,33 @@ int main(int argc, char **argv)
goto err;
}
- if (!nop_calibration) {
- if (verbose > 1)
- printf("Calibrating nop delay with %u%% tolerance...\n",
+ if (!has_nop_calibration) {
+ if (verbose > 1) {
+ printf("Calibrating nop delays with %u%% tolerance...\n",
tolerance_pct);
- nop_calibration = calibrate_nop(tolerance_pct);
- if (verbose)
- printf("Nop calibration for %uus delay is %lu.\n",
- nop_calibration_us, nop_calibration);
+ }
+ calibrate_engines();
+
+ if (verbose)
+ print_engine_calibrations();
goto out;
+ } else {
+ bool missing = false;
+
+ for (i = 0; i < NUM_ENGINES; i++) {
+ if (i == DEFAULT || i == VCS)
+ continue;
+
+ if (!engine_calib_map[i]) {
+ wsim_err("Missing calibration for '%s'!\n",
+ ring_str_map[i]);
+ missing = true;
+ }
+ }
+
+ if (missing)
+ goto err;
}
if (!nr_w_args) {
@@ -3309,8 +3513,7 @@ int main(int argc, char **argv)
if (verbose > 1) {
printf("Random seed is %u.\n", master_prng);
- printf("Using %lu nop calibration for %uus delay.\n",
- nop_calibration, nop_calibration_us);
+ print_engine_calibrations();
printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
if (flags & SWAPVCS)
printf("Swapping VCS rings between clients.\n");