From 20d89b417e0bceb79cd80456838b1e91662d445e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 18 Sep 2017 09:06:19 +0100 Subject: tests/perf_pmu: Tests for i915 PMU API A bunch of tests for the new i915 PMU feature. Parts of the code were initialy sketched by Dmitry Rogozhkin. v2: (Most suggestions by Chris Wilson) * Add new class/instance based engine list. * Add gem_has_engine/gem_require_engine to work with class/instance. * Use the above two throughout the test. * Shorten tests to 100ms busy batches, seems enough. * Add queued counter sanity checks. * Use igt_nsec_elapsed. * Skip on perf -ENODEV in some tests instead of embedding knowledge locally. * Fix multi ordering for busy accounting. * Use new guranteed_usleep when sleep time is asserted on. * Check for no queued when idle/busy. * Add queued counter init test. * Add queued tests. * Consolidate and increase multiple busy engines tests to most-busy and all-busy tests. * Guarantte interrupts by using fences. * Test RC6 via forcewake. v3: * Tweak assert in interrupts subtest. * Sprinkle of comments. * Fix multi-client test which got broken in v2. v4: * Measured instead of guaranteed sleep. * Missing sync in no_sema. * Log busyness before asserts for debug. * access(2) instead of open(2) to determine if cpu0 is hotpluggable. * Test frequency reporting via min/max setting instead assuming. ^^ All above suggested by Chris Wilson. ^^ * Drop queued subtests to match i915. * Use long batches with fences to ensure interrupts. * Test render node as well. v5: * Add to meson build. (Petri Latvala) * Use 1eN constants. (Chris Wilson) * Add tests for semaphore and event waiting. v6: * Fix interrupts subtest by polling the fence from the "outside". (Chris Wilson) v7: * Assert number of initialized engines matches the expectation. (Chris Wilson) * Warn instead of skipping if we couldn't restore the initial frequency. (Chris Wilson) * Move all asserts to after the test cleanup (just a tidy). * More 1eN notation for timeouts. * Bump the tolerance to 5% since I saw a few noisy runs with sampling counters. * Always start the PMU before submitting batches to lower reliance on i915 doing the delayed engine busy stats disable. v8: * Update for upstream engine class enum. v9: * Add meson build support. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Dmitry Rogozhkin Reviewed-by: Chris Wilson --- lib/igt_gt.c | 50 ++ lib/igt_gt.h | 38 ++ lib/igt_perf.h | 9 +- tests/Makefile.am | 1 + tests/Makefile.sources | 1 + tests/meson.build | 6 + tests/perf_pmu.c | 1242 ++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 1339 insertions(+), 8 deletions(-) create mode 100644 tests/perf_pmu.c diff --git a/lib/igt_gt.c b/lib/igt_gt.c index 64a2dfd6..4a8f541f 100644 --- a/lib/igt_gt.c +++ b/lib/igt_gt.c @@ -608,3 +608,53 @@ bool gem_can_store_dword(int fd, unsigned int engine) return true; } + +const struct intel_execution_engine2 intel_execution_engines2[] = { + { "rcs0", I915_ENGINE_CLASS_RENDER, 0 }, + { "bcs0", I915_ENGINE_CLASS_COPY, 0 }, + { "vcs0", I915_ENGINE_CLASS_VIDEO, 0 }, + { "vcs1", I915_ENGINE_CLASS_VIDEO, 1 }, + { "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0 }, +}; + +unsigned int +gem_class_instance_to_eb_flags(int gem_fd, + enum drm_i915_gem_engine_class class, + unsigned int instance) +{ + if (class != I915_ENGINE_CLASS_VIDEO) + igt_assert(instance == 0); + else + igt_assert(instance >= 0 && instance <= 1); + + switch (class) { + case I915_ENGINE_CLASS_RENDER: + return I915_EXEC_RENDER; + case I915_ENGINE_CLASS_COPY: + return I915_EXEC_BLT; + case I915_ENGINE_CLASS_VIDEO: + if (instance == 0) { + if (gem_has_bsd2(gem_fd)) + return I915_EXEC_BSD | I915_EXEC_BSD_RING1; + else + return I915_EXEC_BSD; + + } else { + return I915_EXEC_BSD | I915_EXEC_BSD_RING2; + } + case I915_ENGINE_CLASS_VIDEO_ENHANCE: + return I915_EXEC_VEBOX; + case I915_ENGINE_CLASS_INVALID: + default: + igt_assert(0); + }; +} + +bool gem_has_engine(int gem_fd, + enum drm_i915_gem_engine_class class, + unsigned int instance) +{ + return gem_has_ring(gem_fd, + gem_class_instance_to_eb_flags(gem_fd, class, + instance)); +} diff --git a/lib/igt_gt.h b/lib/igt_gt.h index 2579cbd3..48ed48af 100644 --- a/lib/igt_gt.h +++ b/lib/igt_gt.h @@ -25,6 +25,7 @@ #define IGT_GT_H #include "igt_debugfs.h" +#include "igt_core.h" void igt_require_hang_ring(int fd, int ring); @@ -80,4 +81,41 @@ extern const struct intel_execution_engine { bool gem_can_store_dword(int fd, unsigned int engine); +extern const struct intel_execution_engine2 { + const char *name; + int class; + int instance; +} intel_execution_engines2[]; + +#define for_each_engine_class_instance(fd__, e__) \ + for ((e__) = intel_execution_engines2;\ + (e__)->name; \ + (e__)++) + +enum drm_i915_gem_engine_class { + I915_ENGINE_CLASS_RENDER = 0, + I915_ENGINE_CLASS_COPY = 1, + I915_ENGINE_CLASS_VIDEO = 2, + I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + + I915_ENGINE_CLASS_INVALID = -1 +}; + +unsigned int +gem_class_instance_to_eb_flags(int gem_fd, + enum drm_i915_gem_engine_class class, + unsigned int instance); + +bool gem_has_engine(int gem_fd, + enum drm_i915_gem_engine_class class, + unsigned int instance); + +static inline +void gem_require_engine(int gem_fd, + enum drm_i915_gem_engine_class class, + unsigned int instance) +{ + igt_require(gem_has_engine(gem_fd, class, instance)); +} + #endif /* IGT_GT_H */ diff --git a/lib/igt_perf.h b/lib/igt_perf.h index 938d5488..5428feb0 100644 --- a/lib/igt_perf.h +++ b/lib/igt_perf.h @@ -29,14 +29,7 @@ #include -enum drm_i915_gem_engine_class { - I915_ENGINE_CLASS_RENDER = 0, - I915_ENGINE_CLASS_COPY = 1, - I915_ENGINE_CLASS_VIDEO = 2, - I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, - - I915_ENGINE_CLASS_INVALID = -1 -}; +#include "igt_gt.h" enum drm_i915_pmu_engine_sample { I915_SAMPLE_BUSY = 0, diff --git a/tests/Makefile.am b/tests/Makefile.am index a4a16838..a790d8f9 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -132,6 +132,7 @@ gen7_forcewake_mt_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gen7_forcewake_mt_LDADD = $(LDADD) -lpthread gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_userptr_blits_LDADD = $(LDADD) -lpthread +perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la gem_wait_LDADD = $(LDADD) -lrt kms_flip_LDADD = $(LDADD) -lrt -lpthread diff --git a/tests/Makefile.sources b/tests/Makefile.sources index e03d8235..b4d4831e 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -217,6 +217,7 @@ TESTS_progs = \ kms_vblank \ meta_test \ perf \ + perf_pmu \ pm_backlight \ pm_lpsp \ pm_rc6_residency \ diff --git a/tests/meson.build b/tests/meson.build index 5af4e515..dd2abd79 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -195,6 +195,7 @@ test_progs = [ 'kms_vblank', 'meta_test', 'perf', + 'perf_pmu', 'pm_backlight', 'pm_lpsp', 'pm_rc6_residency', @@ -262,9 +263,14 @@ endif libexecdir = join_paths(get_option('prefix'), get_option('libexecdir'), 'intel-gpu-tools') foreach prog : test_progs + link = [] + if prog == 'perf_pmu' + link += lib_igt_perf + endif executable(prog, prog + '.c', dependencies : test_deps, install_dir : libexecdir, + link_with : link, install : true) endforeach diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c new file mode 100644 index 00000000..8585ed7b --- /dev/null +++ b/tests/perf_pmu.c @@ -0,0 +1,1242 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "igt.h" +#include "igt_core.h" +#include "igt_perf.h" +#include "igt_sysfs.h" + +IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface"); + +const double tolerance = 0.05f; +const unsigned long batch_duration_ns = 100e6; + +static int open_pmu(uint64_t config) +{ + int fd; + + fd = perf_i915_open(config); + igt_require(fd >= 0 || (fd < 0 && errno != ENODEV)); + igt_assert(fd >= 0); + + return fd; +} + +static int open_group(uint64_t config, int group) +{ + int fd; + + fd = perf_i915_open_group(config, group); + igt_require(fd >= 0 || (fd < 0 && errno != ENODEV)); + igt_assert(fd >= 0); + + return fd; +} + +static void +init(int gem_fd, const struct intel_execution_engine2 *e, uint8_t sample) +{ + int fd; + + fd = open_pmu(__I915_PMU_ENGINE(e->class, e->instance, sample)); + + close(fd); +} + +static uint64_t pmu_read_single(int fd) +{ + uint64_t data[2]; + + igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data)); + + return data[0]; +} + +static void pmu_read_multi(int fd, unsigned int num, uint64_t *val) +{ + uint64_t buf[2 + num]; + unsigned int i; + + igt_assert_eq(read(fd, buf, sizeof(buf)), sizeof(buf)); + + for (i = 0; i < num; i++) + val[i] = buf[2 + i]; +} + +#define assert_within_epsilon(x, ref, tolerance) \ + igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \ + (double)(x) >= (1.0 - tolerance) * (double)ref, \ + "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\ + #x, #ref, (double)x, tolerance * 100.0, (double)ref) + +/* + * Helper for cases where we assert on time spent sleeping (directly or + * indirectly), so make it more robust by ensuring the system sleep time + * is within test tolerance to start with. + */ +static unsigned int measured_usleep(unsigned int usec) +{ + uint64_t slept = 0; + + while (usec > 0) { + struct timespec start = { }; + uint64_t this_sleep; + + igt_nsec_elapsed(&start); + usleep(usec); + this_sleep = igt_nsec_elapsed(&start); + slept += this_sleep; + if (this_sleep > usec * 1000) + break; + usec -= this_sleep; + } + + return slept; +} + +static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e) +{ + return gem_class_instance_to_eb_flags(gem_fd, e->class, e->instance); +} + +static void +single(int gem_fd, const struct intel_execution_engine2 *e, bool busy) +{ + double ref = busy ? batch_duration_ns : 0.0f; + igt_spin_t *spin; + uint64_t val; + int fd; + + fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance)); + + if (busy) { + spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + igt_spin_batch_set_timeout(spin, batch_duration_ns); + } else { + usleep(batch_duration_ns / 1000); + } + + if (busy) + gem_sync(gem_fd, spin->handle); + + val = pmu_read_single(fd); + + if (busy) + igt_spin_batch_free(gem_fd, spin); + close(fd); + + assert_within_epsilon(val, ref, tolerance); +} + +static void log_busy(int fd, unsigned int num_engines, uint64_t *val) +{ + char buf[1024]; + int rem = sizeof(buf); + unsigned int i; + char *p = buf; + + for (i = 0; i < num_engines; i++) { + int len; + + len = snprintf(p, rem, "%u=%" PRIu64 "\n", i, val[i]); + igt_assert(len > 0); + rem -= len; + p += len; + } + + igt_info("%s", buf); +} + +static void +busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, + const unsigned int num_engines) +{ + const struct intel_execution_engine2 *e_; + uint64_t val[num_engines]; + int fd[num_engines]; + igt_spin_t *spin; + unsigned int busy_idx, i; + + i = 0; + fd[0] = -1; + for_each_engine_class_instance(fd, e_) { + if (!gem_has_engine(gem_fd, e_->class, e_->instance)) + continue; + else if (e == e_) + busy_idx = i; + + fd[i++] = open_group(I915_PMU_ENGINE_BUSY(e_->class, + e_->instance), + fd[0]); + } + + igt_assert_eq(i, num_engines); + + spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + igt_spin_batch_set_timeout(spin, batch_duration_ns); + + gem_sync(gem_fd, spin->handle); + + pmu_read_multi(fd[0], num_engines, val); + log_busy(fd[0], num_engines, val); + + igt_spin_batch_free(gem_fd, spin); + close(fd[0]); + + assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance); + for (i = 0; i < num_engines; i++) { + if (i == busy_idx) + continue; + assert_within_epsilon(val[i], 0.0f, tolerance); + } + +} + +static void +most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, + const unsigned int num_engines) +{ + const struct intel_execution_engine2 *e_; + uint64_t val[num_engines]; + int fd[num_engines]; + igt_spin_t *spin[num_engines]; + unsigned int idle_idx, i; + + gem_require_engine(gem_fd, e->class, e->instance); + + i = 0; + fd[0] = -1; + for_each_engine_class_instance(fd, e_) { + if (!gem_has_engine(gem_fd, e_->class, e_->instance)) + continue; + + fd[i] = open_group(I915_PMU_ENGINE_BUSY(e_->class, + e_->instance), + fd[0]); + + if (e == e_) { + idle_idx = i; + } else { + spin[i] = igt_spin_batch_new(gem_fd, 0, + e2ring(gem_fd, e_), 0); + igt_spin_batch_set_timeout(spin[i], batch_duration_ns); + } + + i++; + } + + for (i = 0; i < num_engines; i++) { + if (i != idle_idx) + gem_sync(gem_fd, spin[i]->handle); + } + + pmu_read_multi(fd[0], num_engines, val); + log_busy(fd[0], num_engines, val); + + for (i = 0; i < num_engines; i++) { + if (i != idle_idx) + igt_spin_batch_free(gem_fd, spin[i]); + } + close(fd[0]); + + for (i = 0; i < num_engines; i++) { + if (i == idle_idx) + assert_within_epsilon(val[i], 0.0f, tolerance); + else + assert_within_epsilon(val[i], batch_duration_ns, + tolerance); + } +} + +static void +all_busy_check_all(int gem_fd, const unsigned int num_engines) +{ + const struct intel_execution_engine2 *e; + uint64_t val[num_engines]; + int fd[num_engines]; + igt_spin_t *spin[num_engines]; + unsigned int i; + + i = 0; + fd[0] = -1; + for_each_engine_class_instance(fd, e) { + if (!gem_has_engine(gem_fd, e->class, e->instance)) + continue; + + fd[i] = open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance), + fd[0]); + + spin[i] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + igt_spin_batch_set_timeout(spin[i], batch_duration_ns); + + i++; + } + + for (i = 0; i < num_engines; i++) + gem_sync(gem_fd, spin[i]->handle); + + pmu_read_multi(fd[0], num_engines, val); + log_busy(fd[0], num_engines, val); + + for (i = 0; i < num_engines; i++) + igt_spin_batch_free(gem_fd, spin[i]); + close(fd[0]); + + for (i = 0; i < num_engines; i++) + assert_within_epsilon(val[i], batch_duration_ns, tolerance); +} + +static void +no_sema(int gem_fd, const struct intel_execution_engine2 *e, bool busy) +{ + igt_spin_t *spin; + uint64_t val[2]; + int fd; + + fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1); + open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance), fd); + + if (busy) { + spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + igt_spin_batch_set_timeout(spin, batch_duration_ns); + } else { + usleep(batch_duration_ns / 1000); + } + + if (busy) + gem_sync(gem_fd, spin->handle); + + pmu_read_multi(fd, 2, val); + + if (busy) + igt_spin_batch_free(gem_fd, spin); + close(fd); + + assert_within_epsilon(val[0], 0.0f, tolerance); + assert_within_epsilon(val[1], 0.0f, tolerance); +} + +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) +#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ +#define MI_SEMAPHORE_POLL (1<<15) +#define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) + +static void +sema_wait(int gem_fd, const struct intel_execution_engine2 *e) +{ + struct drm_i915_gem_relocation_entry reloc = { }; + struct drm_i915_gem_execbuffer2 eb = { }; + struct drm_i915_gem_exec_object2 obj[2]; + uint32_t bb_handle, obj_handle; + unsigned long slept; + uint32_t *obj_ptr; + uint32_t batch[6]; + uint64_t val[2]; + int fd; + + igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8); + + /** + * Setup up a batchbuffer with a polling semaphore wait command which + * will wait on an value in a shared bo to change. This way we are able + * to control how much time we will spend in this bb. + */ + + bb_handle = gem_create(gem_fd, 4096); + obj_handle = gem_create(gem_fd, 4096); + + obj_ptr = gem_mmap__wc(gem_fd, obj_handle, 0, 4096, PROT_WRITE); + + batch[0] = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD; + batch[1] = 1; + batch[2] = 0x0; + batch[3] = 0x0; + batch[4] = MI_NOOP; + batch[5] = MI_BATCH_BUFFER_END; + + gem_write(gem_fd, bb_handle, 0, batch, sizeof(batch)); + + reloc.target_handle = obj_handle; + reloc.offset = 2 * sizeof(uint32_t); + reloc.read_domains = I915_GEM_DOMAIN_RENDER; + + memset(obj, 0, sizeof(obj)); + + obj[0].handle = obj_handle; + + obj[1].handle = bb_handle; + obj[1].relocation_count = 1; + obj[1].relocs_ptr = to_user_pointer(&reloc); + + eb.buffer_count = 2; + eb.buffers_ptr = to_user_pointer(obj); + eb.flags = e2ring(gem_fd, e); + + /** + * Start the semaphore wait PMU and after some known time let the above + * semaphore wait command finish. Then check that the PMU is reporting + * to expected time spent in semaphore wait state. + */ + + fd = open_pmu(I915_PMU_ENGINE_SEMA(e->class, e->instance)); + + val[0] = pmu_read_single(fd); + + gem_execbuf(gem_fd, &eb); + + slept = measured_usleep(100e3); + + *obj_ptr = 1; + + gem_sync(gem_fd, bb_handle); + + val[1] = pmu_read_single(fd); + + munmap(obj_ptr, 4096); + gem_close(gem_fd, obj_handle); + gem_close(gem_fd, bb_handle); + close(fd); + + assert_within_epsilon(val[1] - val[0], slept, tolerance); +} + +#define MI_WAIT_FOR_PIPE_C_VBLANK (1<<21) +#define MI_WAIT_FOR_PIPE_B_VBLANK (1<<11) +#define MI_WAIT_FOR_PIPE_A_VBLANK (1<<3) + +typedef struct { + igt_display_t display; + struct igt_fb primary_fb; + igt_output_t *output; + enum pipe pipe; +} data_t; + +static void prepare_crtc(data_t *data, int fd, igt_output_t *output) +{ + drmModeModeInfo *mode; + igt_display_t *display = &data->display; + igt_plane_t *primary; + + /* select the pipe we want to use */ + igt_output_set_pipe(output, data->pipe); + + /* create and set the primary plane fb */ + mode = igt_output_get_mode(output); + igt_create_color_fb(fd, mode->hdisplay, mode->vdisplay, + DRM_FORMAT_XRGB8888, + LOCAL_DRM_FORMAT_MOD_NONE, + 0.0, 0.0, 0.0, + &data->primary_fb); + + primary = igt_output_get_plane_type(output, DRM_PLANE_TYPE_PRIMARY); + igt_plane_set_fb(primary, &data->primary_fb); + + igt_display_commit(display); + + igt_wait_for_vblank(fd, data->pipe); +} + +static void cleanup_crtc(data_t *data, int fd, igt_output_t *output) +{ + igt_display_t *display = &data->display; + igt_plane_t *primary; + + igt_remove_fb(fd, &data->primary_fb); + + primary = igt_output_get_plane_type(output, DRM_PLANE_TYPE_PRIMARY); + igt_plane_set_fb(primary, NULL); + + igt_output_set_pipe(output, PIPE_ANY); + igt_display_commit(display); +} + +static int wait_vblank(int fd, union drm_wait_vblank *vbl) +{ + int err; + + err = 0; + if (igt_ioctl(fd, DRM_IOCTL_WAIT_VBLANK, vbl)) + err = -errno; + + return err; +} + +static void +event_wait(int gem_fd, const struct intel_execution_engine2 *e) +{ + struct drm_i915_gem_exec_object2 obj = { }; + struct drm_i915_gem_execbuffer2 eb = { }; + data_t data; + igt_display_t *display = &data.display; + const uint32_t DERRMR = 0x44050; + unsigned int valid_tests = 0; + uint32_t batch[8], *b; + igt_output_t *output; + uint32_t bb_handle; + uint32_t reg; + enum pipe p; + int fd; + + igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 6); + igt_require(intel_register_access_init(intel_get_pci_device(), + false, gem_fd) == 0); + + /** + * We will use the display to render event forwarind so need to + * program the DERRMR register and restore it at exit. + * + * We will emit a MI_WAIT_FOR_EVENT listening for vblank events, + * have a background helper to indirectly enable vblank irqs, and + * listen to the recorded time spent in engine wait state as reported + * by the PMU. + */ + reg = intel_register_read(DERRMR); + + kmstest_set_vt_graphics_mode(); + igt_display_init(&data.display, gem_fd); + + bb_handle = gem_create(gem_fd, 4096); + + b = batch; + *b++ = MI_LOAD_REGISTER_IMM; + *b++ = DERRMR; + *b++ = reg & ~((1 << 3) | (1 << 11) | (1 << 21)); + *b++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPE_A_VBLANK; + *b++ = MI_LOAD_REGISTER_IMM; + *b++ = DERRMR; + *b++ = reg; + *b++ = MI_BATCH_BUFFER_END; + + obj.handle = bb_handle; + + eb.buffer_count = 1; + eb.buffers_ptr = to_user_pointer(&obj); + eb.flags = e2ring(gem_fd, e) | I915_EXEC_SECURE; + + for_each_pipe_with_valid_output(display, p, output) { + struct igt_helper_process waiter = { }; + const unsigned int frames = 3; + unsigned int frame; + uint64_t val[2]; + + batch[3] = MI_WAIT_FOR_EVENT; + switch (p) { + case PIPE_A: + batch[3] |= MI_WAIT_FOR_PIPE_A_VBLANK; + break; + case PIPE_B: + batch[3] |= MI_WAIT_FOR_PIPE_B_VBLANK; + break; + case PIPE_C: + batch[3] |= MI_WAIT_FOR_PIPE_C_VBLANK; + break; + default: + continue; + } + + gem_write(gem_fd, bb_handle, 0, batch, sizeof(batch)); + + data.pipe = p; + prepare_crtc(&data, gem_fd, output); + + fd = open_pmu(I915_PMU_ENGINE_WAIT(e->class, e->instance)); + + val[0] = pmu_read_single(fd); + + igt_fork_helper(&waiter) { + const uint32_t pipe_id_flag = + kmstest_get_vbl_flag(data.pipe); + + for (;;) { + union drm_wait_vblank vbl = { }; + + vbl.request.type = DRM_VBLANK_RELATIVE; + vbl.request.type |= pipe_id_flag; + vbl.request.sequence = 1; + igt_assert_eq(wait_vblank(gem_fd, &vbl), 0); + } + } + + for (frame = 0; frame < frames; frame++) { + gem_execbuf(gem_fd, &eb); + gem_sync(gem_fd, bb_handle); + } + + igt_stop_helper(&waiter); + + val[1] = pmu_read_single(fd); + + close(fd); + + cleanup_crtc(&data, gem_fd, output); + valid_tests++; + + igt_assert(val[1] - val[0] > 0); + } + + gem_close(gem_fd, bb_handle); + + intel_register_access_fini(); + + igt_require_f(valid_tests, + "no valid crtc/connector combinations found\n"); +} + +static void +multi_client(int gem_fd, const struct intel_execution_engine2 *e) +{ + uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance); + unsigned int slept; + igt_spin_t *spin; + uint64_t val[2]; + int fd[2]; + + fd[0] = open_pmu(config); + + /* + * Second PMU client which is initialized after the first one, + * and exists before it, should not affect accounting as reported + * in the first client. + */ + fd[1] = open_pmu(config); + + spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + igt_spin_batch_set_timeout(spin, batch_duration_ns); + + slept = measured_usleep(batch_duration_ns / 3000); + val[1] = pmu_read_single(fd[1]); + close(fd[1]); + + gem_sync(gem_fd, spin->handle); + + val[0] = pmu_read_single(fd[0]); + + igt_spin_batch_free(gem_fd, spin); + close(fd[0]); + + assert_within_epsilon(val[0], batch_duration_ns, tolerance); + assert_within_epsilon(val[1], slept, tolerance); +} + +/** + * Tests that i915 PMU corectly errors out in invalid initialization. + * i915 PMU is uncore PMU, thus: + * - sampling period is not supported + * - pid > 0 is not supported since we can't count per-process (we count + * per whole system) + * - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0 + */ +static void invalid_init(void) +{ + struct perf_event_attr attr; + int pid, cpu; + +#define ATTR_INIT() \ +do { \ + memset(&attr, 0, sizeof (attr)); \ + attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \ + attr.type = i915_type_id(); \ + igt_assert(attr.type != 0); \ +} while(0) + + ATTR_INIT(); + attr.sample_period = 100; + pid = -1; + cpu = 0; + igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1); + igt_assert_eq(errno, EINVAL); + + ATTR_INIT(); + pid = 0; + cpu = 0; + igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1); + igt_assert_eq(errno, EINVAL); + + ATTR_INIT(); + pid = -1; + cpu = 1; + igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1); + igt_assert_eq(errno, ENODEV); +} + +static void init_other(unsigned int i, bool valid) +{ + int fd; + + fd = perf_i915_open(__I915_PMU_OTHER(i)); + igt_require(!(fd < 0 && errno == ENODEV)); + if (valid) { + igt_assert(fd >= 0); + } else { + igt_assert(fd < 0); + return; + } + + close(fd); +} + +static void read_other(unsigned int i, bool valid) +{ + int fd; + + fd = perf_i915_open(__I915_PMU_OTHER(i)); + igt_require(!(fd < 0 && errno == ENODEV)); + if (valid) { + igt_assert(fd >= 0); + } else { + igt_assert(fd < 0); + return; + } + + (void)pmu_read_single(fd); + + close(fd); +} + +static bool cpu0_hotplug_support(void) +{ + return access("/sys/devices/system/cpu/cpu0/online", W_OK) == 0; +} + +static void cpu_hotplug(int gem_fd) +{ + struct timespec start = { }; + igt_spin_t *spin; + uint64_t val, ref; + int fd; + + igt_require(cpu0_hotplug_support()); + + fd = perf_i915_open(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0)); + igt_assert(fd >= 0); + + spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0); + + igt_nsec_elapsed(&start); + + /* + * Toggle online status of all the CPUs in a child process and ensure + * this has not affected busyness stats in the parent. + */ + igt_fork(child, 1) { + int cpu = 0; + + for (;;) { + char name[128]; + int cpufd; + + sprintf(name, "/sys/devices/system/cpu/cpu%d/online", + cpu); + cpufd = open(name, O_WRONLY); + if (cpufd == -1) { + igt_assert(cpu > 0); + break; + } + igt_assert_eq(write(cpufd, "0", 2), 2); + + usleep(1e6); + + igt_assert_eq(write(cpufd, "1", 2), 2); + + close(cpufd); + cpu++; + } + } + + igt_waitchildren(); + + igt_spin_batch_end(spin); + gem_sync(gem_fd, spin->handle); + + ref = igt_nsec_elapsed(&start); + val = pmu_read_single(fd); + + igt_spin_batch_free(gem_fd, spin); + close(fd); + + assert_within_epsilon(val, ref, tolerance); +} + +static unsigned long calibrate_nop(int fd, const unsigned int calibration_us) +{ + const unsigned int cal_min_us = calibration_us * 3; + const unsigned int tolerance_pct = 10; + const uint32_t bbe = MI_BATCH_BUFFER_END; + const unsigned int loops = 17; + struct drm_i915_gem_exec_object2 obj = {}; + struct drm_i915_gem_execbuffer2 eb = + { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj}; + struct timespec t_begin = { }; + long size, last_size; + unsigned long ns; + + igt_nsec_elapsed(&t_begin); + + size = 256 * 1024; + do { + struct timespec t_start = { }; + + obj.handle = gem_create(fd, size); + gem_write(fd, obj.handle, size - sizeof(bbe), &bbe, + sizeof(bbe)); + gem_execbuf(fd, &eb); + gem_sync(fd, obj.handle); + + igt_nsec_elapsed(&t_start); + + for (int loop = 0; loop < loops; loop++) + gem_execbuf(fd, &eb); + gem_sync(fd, obj.handle); + + ns = igt_nsec_elapsed(&t_start); + + gem_close(fd, obj.handle); + + last_size = size; + size = calibration_us * 1000 * size * loops / ns; + size = ALIGN(size, sizeof(uint32_t)); + } while (igt_nsec_elapsed(&t_begin) / 1000 < cal_min_us || + abs(size - last_size) > (size * tolerance_pct / 100)); + + return size / sizeof(uint32_t); +} + +static void exec_nop(int gem_fd, unsigned long sz) +{ + struct drm_i915_gem_exec_object2 obj = {}; + struct drm_i915_gem_execbuffer2 eb = + { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj}; + const uint32_t bbe = MI_BATCH_BUFFER_END; + struct pollfd pfd; + int fence; + + sz = ALIGN(sz, sizeof(uint32_t)); + + obj.handle = gem_create(gem_fd, sz); + gem_write(gem_fd, obj.handle, sz - sizeof(bbe), &bbe, sizeof(bbe)); + + eb.flags = I915_EXEC_RENDER | I915_EXEC_FENCE_OUT; + + gem_execbuf_wr(gem_fd, &eb); + fence = eb.rsvd2 >> 32; + + /* + * Poll on the output fence to ensure user interrupts will be + * generated and listened to. + */ + pfd.fd = fence; + pfd.events = POLLIN; + igt_assert_eq(poll(&pfd, 1, -1), 1); + + close(fence); + gem_close(gem_fd, obj.handle); +} + +static void +test_interrupts(int gem_fd) +{ + const unsigned int calibration_us = 250000; + const unsigned int batch_len_us = 100000; + const unsigned int batch_count = 3e6 / batch_len_us; + uint64_t idle, busy, prev; + unsigned long cal, sz; + unsigned int i; + int fd; + + cal = calibrate_nop(gem_fd, calibration_us); + sz = batch_len_us * cal / calibration_us; + + fd = open_pmu(I915_PMU_INTERRUPTS); + + gem_quiescent_gpu(gem_fd); + + /* Wait for idle state. */ + prev = pmu_read_single(fd); + idle = prev + 1; + while (idle != prev) { + usleep(1e6); + prev = idle; + idle = pmu_read_single(fd); + } + + igt_assert_eq(idle - prev, 0); + + /* + * Send some no-op batches waiting on output fences to + * ensure interrupts. + */ + for (i = 0; i < batch_count; i++) + exec_nop(gem_fd, sz); + + /* Check at least as many interrupts has been generated. */ + busy = pmu_read_single(fd) - idle; + close(fd); + + igt_assert(busy >= batch_count); +} + +static void +test_frequency(int gem_fd) +{ + const uint64_t duration_ns = 2e9; + uint32_t min_freq, max_freq, boost_freq; + uint64_t min[2], max[2], start[2]; + igt_spin_t *spin; + int fd, sysfs; + + sysfs = igt_sysfs_open(gem_fd, NULL); + igt_require(sysfs >= 0); + + min_freq = igt_sysfs_get_u32(sysfs, "gt_RPn_freq_mhz"); + max_freq = igt_sysfs_get_u32(sysfs, "gt_RP0_freq_mhz"); + boost_freq = igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz"); + igt_require(min_freq > 0 && max_freq > 0 && boost_freq > 0); + igt_require(max_freq > min_freq); + igt_require(boost_freq > min_freq); + + fd = open_group(I915_PMU_REQUESTED_FREQUENCY, -1); + open_group(I915_PMU_ACTUAL_FREQUENCY, fd); + + /* + * Set GPU to min frequency and read PMU counters. + */ + igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", min_freq)); + igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == min_freq); + igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", min_freq)); + igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == min_freq); + + pmu_read_multi(fd, 2, start); + + spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0); + igt_spin_batch_set_timeout(spin, duration_ns); + gem_sync(gem_fd, spin->handle); + + pmu_read_multi(fd, 2, min); + min[0] -= start[0]; + min[1] -= start[1]; + + igt_spin_batch_free(gem_fd, spin); + + usleep(1e6); + + /* + * Set GPU to max frequency and read PMU counters. + */ + igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", max_freq)); + igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == max_freq); + igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", boost_freq)); + igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == boost_freq); + + igt_require(igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", max_freq)); + igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == max_freq); + + pmu_read_multi(fd, 2, start); + + spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0); + igt_spin_batch_set_timeout(spin, duration_ns); + gem_sync(gem_fd, spin->handle); + + pmu_read_multi(fd, 2, max); + max[0] -= start[0]; + max[1] -= start[1]; + + igt_spin_batch_free(gem_fd, spin); + + /* + * Restore min/max. + */ + igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", min_freq); + if (igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") != min_freq) + igt_warn("Unable to restore min frequency to saved value [%u MHz], now %u MHz\n", + min_freq, igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz")); + close(fd); + + igt_assert(min[0] < max[0]); + igt_assert(min[1] < max[1]); +} + +static void +test_rc6(int gem_fd) +{ + int64_t duration_ns = 2e9; + uint64_t idle, busy, prev; + unsigned int slept; + int fd, fw; + + fd = open_pmu(I915_PMU_RC6_RESIDENCY); + + gem_quiescent_gpu(gem_fd); + usleep(1e6); + + /* Go idle and check full RC6. */ + prev = pmu_read_single(fd); + slept = measured_usleep(duration_ns / 1000); + idle = pmu_read_single(fd); + + assert_within_epsilon(idle - prev, slept, tolerance); + + /* Wake up device and check no RC6. */ + fw = igt_open_forcewake_handle(gem_fd); + igt_assert(fw >= 0); + + prev = pmu_read_single(fd); + usleep(duration_ns / 1000); + busy = pmu_read_single(fd); + + close(fw); + close(fd); + + assert_within_epsilon(busy - prev, 0.0, tolerance); +} + +static void +test_rc6p(int gem_fd) +{ + int64_t duration_ns = 2e9; + unsigned int num_pmu = 1; + uint64_t idle[3], busy[3], prev[3]; + unsigned int slept, i; + int fd, ret, fw; + + fd = open_group(I915_PMU_RC6_RESIDENCY, -1); + ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd); + if (ret > 0) { + num_pmu++; + ret = perf_i915_open_group(I915_PMU_RC6pp_RESIDENCY, fd); + if (ret > 0) + num_pmu++; + } + + igt_require(num_pmu == 3); + + gem_quiescent_gpu(gem_fd); + usleep(1e6); + + /* Go idle and check full RC6. */ + pmu_read_multi(fd, num_pmu, prev); + slept = measured_usleep(duration_ns / 1000); + pmu_read_multi(fd, num_pmu, idle); + + for (i = 0; i < num_pmu; i++) + assert_within_epsilon(idle[i] - prev[i], slept, tolerance); + + /* Wake up device and check no RC6. */ + fw = igt_open_forcewake_handle(gem_fd); + igt_assert(fw >= 0); + + pmu_read_multi(fd, num_pmu, prev); + usleep(duration_ns / 1000); + pmu_read_multi(fd, num_pmu, busy); + + close(fw); + close(fd); + + for (i = 0; i < num_pmu; i++) + assert_within_epsilon(busy[i] - prev[i], 0.0, tolerance); +} + +igt_main +{ + const unsigned int num_other_metrics = + I915_PMU_LAST - __I915_PMU_OTHER(0) + 1; + unsigned int num_engines = 0; + int fd = -1; + const struct intel_execution_engine2 *e; + unsigned int i; + + igt_fixture { + fd = drm_open_driver_master(DRIVER_INTEL); + + igt_require_gem(fd); + igt_require(i915_type_id() > 0); + + for_each_engine_class_instance(fd, e) { + if (gem_has_engine(fd, e->class, e->instance)) + num_engines++; + } + } + + /** + * Test invalid access via perf API is rejected. + */ + igt_subtest("invalid-init") + invalid_init(); + + for_each_engine_class_instance(fd, e) { + /** + * Test that a single engine metric can be initialized. + */ + igt_subtest_f("init-busy-%s", e->name) + init(fd, e, I915_SAMPLE_BUSY); + + igt_subtest_f("init-wait-%s", e->name) + init(fd, e, I915_SAMPLE_WAIT); + + igt_subtest_f("init-sema-%s", e->name) + init(fd, e, I915_SAMPLE_SEMA); + + /** + * Test that engines show no load when idle. + */ + igt_subtest_f("idle-%s", e->name) + single(fd, e, false); + + /** + * Test that a single engine reports load correctly. + */ + igt_subtest_f("busy-%s", e->name) + single(fd, e, true); + + /** + * Test that when one engine is loaded other report no load. + */ + igt_subtest_f("busy-check-all-%s", e->name) + busy_check_all(fd, e, num_engines); + + /** + * Test that when all except one engine are loaded all loads + * are correctly reported. + */ + igt_subtest_f("most-busy-check-all-%s", e->name) + most_busy_check_all(fd, e, num_engines); + + /** + * Test that semphore counters report no activity on idle + * or busy engines. + */ + igt_subtest_f("idle-no-semaphores-%s", e->name) + no_sema(fd, e, false); + + igt_subtest_f("busy-no-semaphores-%s", e->name) + no_sema(fd, e, true); + + /** + * Test that semaphore waits are correctly reported. + */ + igt_subtest_f("semaphore-wait-%s", e->name) + sema_wait(fd, e); + + /** + * Test that event waits are correctly reported. + */ + if (e->class == I915_ENGINE_CLASS_RENDER) + igt_subtest_f("event-wait-%s", e->name) + event_wait(fd, e); + + /** + * Check that two perf clients do not influence each others + * observations. + */ + igt_subtest_f("multi-client-%s", e->name) + multi_client(fd, e); + } + + /** + * Test that when all engines are loaded all loads are + * correctly reported. + */ + igt_subtest("all-busy-check-all") + all_busy_check_all(fd, num_engines); + + /** + * Test that non-engine counters can be initialized and read. Apart + * from the invalid metric which should fail. + */ + for (i = 0; i < num_other_metrics + 1; i++) { + igt_subtest_f("other-init-%u", i) + init_other(i, i < num_other_metrics); + + igt_subtest_f("other-read-%u", i) + read_other(i, i < num_other_metrics); + } + + /** + * Test counters are not affected by CPU offline/online events. + */ + igt_subtest("cpu-hotplug") + cpu_hotplug(fd); + + /** + * Test GPU frequency. + */ + igt_subtest("frequency") + test_frequency(fd); + + /** + * Test interrupt count reporting. + */ + igt_subtest("interrupts") + test_interrupts(fd); + + /** + * Test RC6 residency reporting. + */ + igt_subtest("rc6") + test_rc6(fd); + + /** + * Test RC6p residency reporting. + */ + igt_subtest("rc6p") + test_rc6p(fd); + + /** + * Check render nodes are counted. + */ + igt_subtest_group { + int render_fd; + + igt_fixture { + render_fd = drm_open_driver_render(DRIVER_INTEL); + igt_require_gem(render_fd); + + gem_quiescent_gpu(fd); + } + + for_each_engine_class_instance(fd, e) { + igt_subtest_f("render-node-busy-%s", e->name) + single(fd, e, true); + } + + igt_fixture { + close(render_fd); + } + } +} -- cgit v1.2.3