summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/igt_perf.c6
-rw-r--r--lib/igt_perf.h1
-rw-r--r--man/intel_gpu_top.rst41
-rw-r--r--tools/Makefile.am2
-rw-r--r--tools/intel_gpu_top.c1250
-rw-r--r--tools/meson.build6
6 files changed, 719 insertions, 587 deletions
diff --git a/lib/igt_perf.c b/lib/igt_perf.c
index 99d82ea5..e3dec2cc 100644
--- a/lib/igt_perf.c
+++ b/lib/igt_perf.c
@@ -69,3 +69,9 @@ int igt_perf_open(uint64_t type, uint64_t config)
return _perf_open(type, config, -1,
PERF_FORMAT_TOTAL_TIME_ENABLED);
}
+
+int igt_perf_open_group(uint64_t type, uint64_t config, int group)
+{
+ return _perf_open(type, config, group,
+ PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
+}
diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index 614ea5d2..e00718f4 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -55,5 +55,6 @@ uint64_t i915_type_id(void);
int perf_i915_open(uint64_t config);
int perf_i915_open_group(uint64_t config, int group);
int igt_perf_open(uint64_t type, uint64_t config);
+int igt_perf_open_group(uint64_t type, uint64_t config, int group);
#endif /* I915_PERF_H */
diff --git a/man/intel_gpu_top.rst b/man/intel_gpu_top.rst
index a5f7175b..19c71230 100644
--- a/man/intel_gpu_top.rst
+++ b/man/intel_gpu_top.rst
@@ -7,9 +7,9 @@ Display a top-like summary of Intel GPU usage
---------------------------------------------
.. include:: defs.rst
:Author: IGT Developers <igt-dev@lists.freedesktop.org>
-:Date: 2016-03-01
+:Date: 2018-04-04
:Version: |PACKAGE_STRING|
-:Copyright: 2009,2011,2012,2016 Intel Corporation
+:Copyright: 2009,2011,2012,2016,2018 Intel Corporation
:Manual section: |MANUAL_SECTION|
:Manual group: |MANUAL_GROUP|
@@ -21,42 +21,25 @@ SYNOPSIS
DESCRIPTION
===========
-**intel_gpu_top** is a tool to display usage information of an Intel GPU. It
-requires root privilege to map the graphics device.
+**intel_gpu_top** is a tool to display usage information on Intel GPU's.
+
+The tool gathers data using perf performance counters (PMU) exposed by i915 and other platform drivers like RAPL (power) and Uncore IMC (memory bandwidth).
OPTIONS
=======
--s SAMPLES
- Number of samples to acquire per second.
-
--o FILE
- Collect usage statistics to FILE. If file is "-", run non-interactively
- and output statistics to stdout.
-
--e COMMAND
- Execute COMMAND to profile, and leave when it is finished. Note that the
- entire command with all parameters should be included as one parameter.
+-s <ms>
+ Refresh period in milliseconds.
-h
- Show usage notes.
+ Show help text.
-EXAMPLES
-========
-
-intel_gpu_top -o "cairo-trace-gvim.log" -s 100 -e "cairo-perf-trace /tmp/gvim"
- Run cairo-perf-trace with /tmp/gvim trace, non-interactively, saving the
- statistics into cairo-trace-gvim.log file, and collecting 100 samples per
- second.
-
-Note that idle units are not displayed, so an entirely idle GPU will only
-display the ring status and header.
+LIMITATIONS
+===========
-BUGS
-====
+* Not all metrics are supported on all platforms. Where a metric is unsupported it's value will be replaced by a dashed line.
-Some GPUs report some units as busy when they aren't, such that even when idle
-and not hung, it will show up as 100% busy.
+* Non-root access to perf counters is controlled by the *perf_event_paranoid* sysctl.
REPORTING BUGS
==============
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 09b6dbcc..a0b016dd 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -28,6 +28,8 @@ intel_aubdump_la_LDFLAGS = -module -avoid-version -no-undefined
intel_aubdump_la_SOURCES = aubdump.c
intel_aubdump_la_LIBADD = $(top_builddir)/lib/libintel_tools.la -ldl
+intel_gpu_top_LDADD = $(top_builddir)/lib/libigt_perf.la
+
bin_SCRIPTS = intel_aubdump
CLEANFILES = $(bin_SCRIPTS)
diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
index 098e6ce3..b923c3cf 100644
--- a/tools/intel_gpu_top.c
+++ b/tools/intel_gpu_top.c
@@ -1,6 +1,5 @@
/*
- * Copyright © 2007 Intel Corporation
- * Copyright © 2011 Intel Corporation
+ * Copyright © 2007-2018 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,695 +23,832 @@
* Authors:
* Eric Anholt <eric@anholt.net>
* Eugeni Dodonov <eugeni.dodonov@intel.com>
- *
*/
-#include "config.h"
-
-#include <inttypes.h>
-#include <unistd.h>
-#include <stdlib.h>
#include <stdio.h>
-#include <err.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/wait.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdint.h>
+#include <assert.h>
#include <string.h>
-#ifdef HAVE_TERMIOS_H
-#include <termios.h>
-#endif
-#include "intel_io.h"
-#include "instdone.h"
-#include "intel_reg.h"
-#include "intel_chipset.h"
-#include "drmtest.h"
-
-#define FORCEWAKE 0xA18C
-#define FORCEWAKE_ACK 0x130090
-
-#define SAMPLES_PER_SEC 10000
-#define SAMPLES_TO_PERCENT_RATIO (SAMPLES_PER_SEC / 100)
-
-#define MAX_NUM_TOP_BITS 100
-
-#define HAS_STATS_REGS(devid) IS_965(devid)
-
-struct top_bit {
- struct instdone_bit *bit;
- int count;
-} top_bits[MAX_NUM_TOP_BITS];
-struct top_bit *top_bits_sorted[MAX_NUM_TOP_BITS];
-
-static uint32_t instdone, instdone1;
-
-static const char *bars[] = {
- " ",
- "▏",
- "▎",
- "▍",
- "▌",
- "▋",
- "▊",
- "▉",
- "█"
-};
+#include <ctype.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <math.h>
+#include <locale.h>
+
+#include "igt_perf.h"
-enum stats_counts {
- IA_VERTICES,
- IA_PRIMITIVES,
- VS_INVOCATION,
- GS_INVOCATION,
- GS_PRIMITIVES,
- CL_INVOCATION,
- CL_PRIMITIVES,
- PS_INVOCATION,
- PS_DEPTH,
- STATS_COUNT
+struct pmu_pair {
+ uint64_t cur;
+ uint64_t prev;
};
-const uint32_t stats_regs[STATS_COUNT] = {
- IA_VERTICES_COUNT_QW,
- IA_PRIMITIVES_COUNT_QW,
- VS_INVOCATION_COUNT_QW,
- GS_INVOCATION_COUNT_QW,
- GS_PRIMITIVES_COUNT_QW,
- CL_INVOCATION_COUNT_QW,
- CL_PRIMITIVES_COUNT_QW,
- PS_INVOCATION_COUNT_QW,
- PS_DEPTH_COUNT_QW,
+struct pmu_counter {
+ bool present;
+ uint64_t config;
+ unsigned int idx;
+ struct pmu_pair val;
};
-const char *stats_reg_names[STATS_COUNT] = {
- "vert fetch",
- "prim fetch",
- "VS invocations",
- "GS invocations",
- "GS prims",
- "CL invocations",
- "CL prims",
- "PS invocations",
- "PS depth pass",
+struct engine {
+ const char *name;
+ const char *display_name;
+
+ unsigned int class;
+ unsigned int instance;
+
+ unsigned int num_counters;
+
+ struct pmu_counter busy;
+ struct pmu_counter wait;
+ struct pmu_counter sema;
};
-uint64_t stats[STATS_COUNT];
-uint64_t last_stats[STATS_COUNT];
+struct engines {
+ unsigned int num_engines;
+ unsigned int num_counters;
+ DIR *root;
+ int fd;
+ struct pmu_pair ts;
+
+ int rapl_fd;
+ double rapl_scale;
+ const char *rapl_unit;
+
+ int imc_fd;
+ double imc_reads_scale;
+ const char *imc_reads_unit;
+ double imc_writes_scale;
+ const char *imc_writes_unit;
+
+ struct pmu_counter freq_req;
+ struct pmu_counter freq_act;
+ struct pmu_counter irq;
+ struct pmu_counter rc6;
+ struct pmu_counter rapl;
+ struct pmu_counter imc_reads;
+ struct pmu_counter imc_writes;
+
+ struct engine engine;
+};
-static unsigned long
-gettime(void)
+static uint64_t
+get_pmu_config(int dirfd, const char *name, const char *counter)
{
- struct timeval t;
- gettimeofday(&t, NULL);
- return (t.tv_usec + (t.tv_sec * 1000000));
-}
+ char buf[128], *p;
+ int fd, ret;
-static int
-top_bits_sort(const void *a, const void *b)
-{
- struct top_bit * const *bit_a = a;
- struct top_bit * const *bit_b = b;
- int a_count = (*bit_a)->count;
- int b_count = (*bit_b)->count;
+ ret = snprintf(buf, sizeof(buf), "%s-%s", name, counter);
+ if (ret < 0 || ret == sizeof(buf))
+ return -1;
- if (a_count < b_count)
- return 1;
- else if (a_count == b_count)
- return 0;
- else
+ fd = openat(dirfd, buf, O_RDONLY);
+ if (fd < 0)
return -1;
-}
-static void
-update_idle_bit(struct top_bit *top_bit)
-{
- uint32_t reg_val;
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (ret <= 0)
+ return -1;
- if (top_bit->bit->reg == INSTDONE_1)
- reg_val = instdone1;
- else
- reg_val = instdone;
+ p = index(buf, '0');
+ if (!p)
+ return -1;
- if ((reg_val & top_bit->bit->bit) == 0)
- top_bit->count++;
+ return strtoul(p, NULL, 0);
}
-static void
-print_clock(const char *name, int clock) {
- if (clock == -1)
- printf("%s clock: unknown", name);
+#define engine_ptr(engines, n) (&engines->engine + (n))
+
+static const char *class_display_name(unsigned int class)
+{
+ switch (class) {
+ case I915_ENGINE_CLASS_RENDER:
+ return "Render/3D";
+ case I915_ENGINE_CLASS_COPY:
+ return "Blitter";
+ case I915_ENGINE_CLASS_VIDEO:
+ return "Video";
+ case I915_ENGINE_CLASS_VIDEO_ENHANCE:
+ return "VideoEnhance";
+ default:
+ return "[unknown]";
+ }
+}
+
+static int engine_cmp(const void *__a, const void *__b)
+{
+ const struct engine *a = (struct engine *)__a;
+ const struct engine *b = (struct engine *)__b;
+
+ if (a->class != b->class)
+ return a->class - b->class;
else
- printf("%s clock: %d Mhz", name, clock);
+ return a->instance - b->instance;
}
-static int
-print_clock_info(struct pci_device *pci_dev)
+static struct engines *discover_engines(void)
{
- uint32_t devid = pci_dev->device_id;
- uint16_t gcfgc;
+ const char *sysfs_root = "/sys/devices/i915/events";
+ struct engines *engines;
+ struct dirent *dent;
+ int ret = 0;
+ DIR *d;
- if (IS_GM45(devid)) {
- int core_clock = -1;
+ engines = malloc(sizeof(struct engines));
+ if (!engines)
+ return NULL;
- pci_device_cfg_read_u16(pci_dev, &gcfgc, I915_GCFGC);
+ memset(engines, 0, sizeof(*engines));
- switch (gcfgc & 0xf) {
- case 8:
- core_clock = 266;
- break;
- case 9:
- core_clock = 320;
- break;
- case 11:
- core_clock = 400;
- break;
- case 13:
- core_clock = 533;
- break;
- }
- print_clock("core", core_clock);
- } else if (IS_965(devid) && IS_MOBILE(devid)) {
- int render_clock = -1, sampler_clock = -1;
+ engines->num_engines = 0;
- pci_device_cfg_read_u16(pci_dev, &gcfgc, I915_GCFGC);
+ d = opendir(sysfs_root);
+ if (!d)
+ return NULL;
- switch (gcfgc & 0xf) {
- case 2:
- render_clock = 250; sampler_clock = 267;
- break;
- case 3:
- render_clock = 320; sampler_clock = 333;
- break;
- case 4:
- render_clock = 400; sampler_clock = 444;
- break;
- case 5:
- render_clock = 500; sampler_clock = 533;
+ while ((dent = readdir(d)) != NULL) {
+ const char *endswith = "-busy";
+ const unsigned int endlen = strlen(endswith);
+ struct engine *engine =
+ engine_ptr(engines, engines->num_engines);
+ char buf[256];
+
+ if (dent->d_type != DT_REG)
+ continue;
+
+ if (strlen(dent->d_name) >= sizeof(buf)) {
+ ret = ENAMETOOLONG;
break;
}
- print_clock("render", render_clock);
- printf(" ");
- print_clock("sampler", sampler_clock);
- } else if (IS_945(devid) && IS_MOBILE(devid)) {
- int render_clock = -1, display_clock = -1;
+ strcpy(buf, dent->d_name);
- pci_device_cfg_read_u16(pci_dev, &gcfgc, I915_GCFGC);
+ /* xxxN-busy */
+ if (strlen(buf) < (endlen + 4))
+ continue;
+ if (strcmp(&buf[strlen(buf) - endlen], endswith))
+ continue;
- switch (gcfgc & 0x7) {
- case 0:
- render_clock = 166;
- break;
- case 1:
- render_clock = 200;
- break;
- case 3:
- render_clock = 250;
- break;
- case 5:
- render_clock = 400;
+ memset(engine, 0, sizeof(*engine));
+
+ buf[strlen(buf) - endlen] = 0;
+ engine->name = strdup(buf);
+ if (!engine->name) {
+ ret = errno;
break;
}
- switch (gcfgc & 0x70) {
- case 0:
- display_clock = 200;
- break;
- case 4:
- display_clock = 320;
+ engine->busy.config = get_pmu_config(dirfd(d), engine->name,
+ "busy");
+ if (engine->busy.config == -1) {
+ ret = ENOENT;
break;
}
- if (gcfgc & (1 << 7))
- display_clock = 133;
- print_clock("render", render_clock);
- printf(" ");
- print_clock("display", display_clock);
- } else if (IS_915(devid) && IS_MOBILE(devid)) {
- int render_clock = -1, display_clock = -1;
+ engine->class = (engine->busy.config &
+ (__I915_PMU_OTHER(0) - 1)) >>
+ I915_PMU_CLASS_SHIFT;
- pci_device_cfg_read_u16(pci_dev, &gcfgc, I915_GCFGC);
+ engine->instance = (engine->busy.config >>
+ I915_PMU_SAMPLE_BITS) &
+ ((1 << I915_PMU_SAMPLE_INSTANCE_BITS) - 1);
- switch (gcfgc & 0x7) {
- case 0:
- render_clock = 160;
- break;
- case 1:
- render_clock = 190;
- break;
- case 4:
- render_clock = 333;
+ ret = snprintf(buf, sizeof(buf), "%s/%u",
+ class_display_name(engine->class),
+ engine->instance);
+ if (ret < 0 || ret == sizeof(buf)) {
+ ret = ENOBUFS;
break;
}
- if (gcfgc & (1 << 13))
- render_clock = 133;
+ ret = 0;
- switch (gcfgc & 0x70) {
- case 0:
- display_clock = 190;
+ engine->display_name = strdup(buf);
+ if (!engine->display_name) {
+ ret = errno;
break;
- case 4:
- display_clock = 333;
+ }
+
+ engines->num_engines++;
+ engines = realloc(engines, sizeof(struct engines) +
+ engines->num_engines * sizeof(struct engine));
+ if (!engines) {
+ ret = errno;
break;
}
- if (gcfgc & (1 << 7))
- display_clock = 133;
+ }
+
+ if (ret) {
+ free(engines);
+ errno = ret;
- print_clock("render", render_clock);
- printf(" ");
- print_clock("display", display_clock);
+ return NULL;
}
+ qsort(engine_ptr(engines, 0), engines->num_engines,
+ sizeof(struct engine), engine_cmp);
+
+ engines->root = d;
- printf("\n");
- return -1;
+ return engines;
}
-#define STATS_LEN (20)
-#define PERCENTAGE_BAR_END (79 - STATS_LEN)
+static int
+filename_to_buf(const char *filename, char *buf, unsigned int bufsize)
+{
+ int fd, err;
+ ssize_t ret;
-static void
-print_percentage_bar(float percent, int cur_line_len)
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ ret = read(fd, buf, bufsize - 1);
+ err = errno;
+ close(fd);
+ if (ret < 1) {
+ errno = ret < 0 ? err : ENOMSG;
+
+ return -1;
+ }
+
+ if (ret > 1 && buf[ret - 1] == '\n')
+ buf[ret - 1] = '\0';
+ else
+ buf[ret] = '\0';
+
+ return 0;
+}
+
+static uint64_t filename_to_u64(const char *filename, int base)
{
- int bar_avail_len = (PERCENTAGE_BAR_END - cur_line_len - 1) * 8;
- int bar_len = bar_avail_len * (percent + .5) / 100.0;
- int i;
+ char buf[64], *b;
- for (i = bar_len; i >= 8; i -= 8) {
- printf("%s", bars[8]);
- cur_line_len++;
+ if (filename_to_buf(filename, buf, sizeof(buf)))
+ return 0;
+
+ /*
+ * Handle both single integer and key=value formats by skipping
+ * leading non-digits.
+ */
+ b = buf;
+ while (*b && !isdigit(*b))
+ b++;
+
+ return strtoull(b, NULL, base);
+}
+
+static double filename_to_double(const char *filename)
+{
+ char *oldlocale;
+ char buf[80];
+ double v;
+
+ if (filename_to_buf(filename, buf, sizeof(buf)))
+ return 0;
+
+ oldlocale = setlocale(LC_ALL, "C");
+ v = strtod(buf, NULL);
+ setlocale(LC_ALL, oldlocale);
+
+ return v;
+}
+
+#define RAPL_ROOT "/sys/devices/power/"
+#define RAPL_EVENT "/sys/devices/power/events/"
+
+static uint64_t rapl_type_id(void)
+{
+ return filename_to_u64(RAPL_ROOT "type", 10);
+}
+
+static uint64_t rapl_gpu_power(void)
+{
+ return filename_to_u64(RAPL_EVENT "energy-gpu", 0);
+}
+
+static double rapl_gpu_power_scale(void)
+{
+ return filename_to_double(RAPL_EVENT "energy-gpu.scale");
+}
+
+static const char *rapl_gpu_power_unit(void)
+{
+ char buf[32];
+
+ if (filename_to_buf(RAPL_EVENT "energy-gpu.unit",
+ buf, sizeof(buf)) == 0)
+ if (!strcmp(buf, "Joules"))
+ return strdup("Watts");
+ else
+ return strdup(buf);
+ else
+ return NULL;
+}
+
+#define IMC_ROOT "/sys/devices/uncore_imc/"
+#define IMC_EVENT "/sys/devices/uncore_imc/events/"
+
+static uint64_t imc_type_id(void)
+{
+ return filename_to_u64(IMC_ROOT "type", 10);
+}
+
+static uint64_t imc_data_reads(void)
+{
+ return filename_to_u64(IMC_EVENT "data_reads", 0);
+}
+
+static double imc_data_reads_scale(void)
+{
+ return filename_to_double(IMC_EVENT "data_reads.scale");
+}
+
+static const char *imc_data_reads_unit(void)
+{
+ char buf[32];
+
+ if (filename_to_buf(IMC_EVENT "data_reads.unit", buf, sizeof(buf)) == 0)
+ return strdup(buf);
+ else
+ return NULL;
+}
+
+static uint64_t imc_data_writes(void)
+{
+ return filename_to_u64(IMC_EVENT "data_writes", 0);
+}
+
+static double imc_data_writes_scale(void)
+{
+ return filename_to_double(IMC_EVENT "data_writes.scale");
+}
+
+static const char *imc_data_writes_unit(void)
+{
+ char buf[32];
+
+ if (filename_to_buf(IMC_EVENT "data_writes.unit",
+ buf, sizeof(buf)) == 0)
+ return strdup(buf);
+ else
+ return NULL;
+}
+
+#define _open_pmu(cnt, pmu, fd) \
+({ \
+ int fd__; \
+\
+ fd__ = perf_i915_open_group((pmu)->config, (fd)); \
+ if (fd__ >= 0) { \
+ if ((fd) == -1) \
+ (fd) = fd__; \
+ (pmu)->present = true; \
+ (pmu)->idx = (cnt)++; \
+ } \
+\
+ fd__; \
+})
+
+#define _open_imc(cnt, pmu, fd) \
+({ \
+ int fd__; \
+\
+ fd__ = igt_perf_open_group(imc_type_id(), (pmu)->config, (fd)); \
+ if (fd__ >= 0) { \
+ if ((fd) == -1) \
+ (fd) = fd__; \
+ (pmu)->present = true; \
+ (pmu)->idx = (cnt)++; \
+ } \
+\
+ fd__; \
+})
+
+static int pmu_init(struct engines *engines)
+{
+ unsigned int i;
+ int fd;
+
+ engines->fd = -1;
+ engines->num_counters = 0;
+
+ engines->irq.config = I915_PMU_INTERRUPTS;
+ fd = _open_pmu(engines->num_counters, &engines->irq, engines->fd);
+ if (fd < 0)
+ return -1;
+
+ engines->freq_req.config = I915_PMU_REQUESTED_FREQUENCY;
+ _open_pmu(engines->num_counters, &engines->freq_req, engines->fd);
+
+ engines->freq_act.config = I915_PMU_ACTUAL_FREQUENCY;
+ _open_pmu(engines->num_counters, &engines->freq_act, engines->fd);
+
+ engines->rc6.config = I915_PMU_RC6_RESIDENCY;
+ _open_pmu(engines->num_counters, &engines->rc6, engines->fd);
+
+ for (i = 0; i < engines->num_engines; i++) {
+ struct engine *engine = engine_ptr(engines, i);
+ struct {
+ struct pmu_counter *pmu;
+ const char *counter;
+ } *cnt, counters[] = {
+ { .pmu = &engine->busy, .counter = "busy" },
+ { .pmu = &engine->wait, .counter = "wait" },
+ { .pmu = &engine->sema, .counter = "sema" },
+ { .pmu = NULL, .counter = NULL },
+ };
+
+ for (cnt = counters; cnt->pmu; cnt++) {
+ if (!cnt->pmu->config)
+ cnt->pmu->config =
+ get_pmu_config(dirfd(engines->root),
+ engine->name,
+ cnt->counter);
+ fd = _open_pmu(engines->num_counters, cnt->pmu,
+ engines->fd);
+ if (fd >= 0)
+ engine->num_counters++;
+ }
}
- if (i) {
- printf("%s", bars[i]);
- cur_line_len++;
+
+ engines->rapl_fd = -1;
+ if (rapl_type_id()) {
+ engines->rapl_scale = rapl_gpu_power_scale();
+ engines->rapl_unit = rapl_gpu_power_unit();
+ if (!engines->rapl_unit)
+ return -1;
+
+ engines->rapl.config = rapl_gpu_power();
+ if (!engines->rapl.config)
+ return -1;
+
+ engines->rapl_fd = igt_perf_open(rapl_type_id(),
+ engines->rapl.config);
+ if (engines->rapl_fd < 0)
+ return -1;
+
+ engines->rapl.present = true;
}
- /* NB: We can't use a field width with utf8 so we manually
- * guarantee a field with of 45 chars for any bar. */
- printf("%*s", PERCENTAGE_BAR_END - cur_line_len, "");
-}
+ engines->imc_fd = -1;
+ if (imc_type_id()) {
+ unsigned int num = 0;
-struct ring {
- const char *name;
- uint32_t mmio;
- int head, tail, size;
- uint64_t full;
- int idle;
-};
+ engines->imc_reads_scale = imc_data_reads_scale();
+ engines->imc_writes_scale = imc_data_writes_scale();
+
+ engines->imc_reads_unit = imc_data_reads_unit();
+ if (!engines->imc_reads_unit)
+ return -1;
+
+ engines->imc_writes_unit = imc_data_writes_unit();
+ if (!engines->imc_writes_unit)
+ return -1;
+
+ engines->imc_reads.config = imc_data_reads();
+ if (!engines->imc_reads.config)
+ return -1;
+
+ engines->imc_writes.config = imc_data_writes();
+ if (!engines->imc_writes.config)
+ return -1;
+
+ fd = _open_imc(num, &engines->imc_reads, engines->imc_fd);
+ if (fd < 0)
+ return -1;
+ fd = _open_imc(num, &engines->imc_writes, engines->imc_fd);
+ if (fd < 0)
+ return -1;
+
+ engines->imc_reads.present = true;
+ engines->imc_writes.present = true;
+ }
+
+ return 0;
+}
-static uint32_t ring_read(struct ring *ring, uint32_t reg)
+static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
{
- return INREG(ring->mmio + reg);
+ uint64_t buf[2 + num];
+ unsigned int i;
+ ssize_t len;
+
+ memset(buf, 0, sizeof(buf));
+
+ len = read(fd, buf, sizeof(buf));
+ assert(len == sizeof(buf));
+
+ for (i = 0; i < num; i++)
+ val[i] = buf[2 + i];
+
+ return buf[1];
}
-static void ring_init(struct ring *ring)
+static double __pmu_calc(struct pmu_pair *p, double d, double t, double s)
{
- ring->size = (((ring_read(ring, RING_LEN) & RING_NR_PAGES) >> 12) + 1) * 4096;
+ double v;
+
+ v = p->cur - p->prev;
+ v /= d;
+ v /= t;
+ v *= s;
+
+ if (s == 100.0 && v > 100.0)
+ v = 100.0;
+
+ return v;
}
-static void ring_reset(struct ring *ring)
+static void fill_str(char *buf, unsigned int bufsz, char c, unsigned int num)
{
- ring->idle = ring->full = 0;
+ unsigned int i;
+
+ for (i = 0; i < num && i < (bufsz - 1); i++)
+ *buf++ = c;
+
+ *buf = 0;
}
-static void ring_sample(struct ring *ring)
+static void pmu_calc(struct pmu_counter *cnt,
+ char *buf, unsigned int bufsz,
+ unsigned int width, unsigned width_dec,
+ double d, double t, double s)
{
- int full;
+ double val;
+ int len;
+
+ assert(bufsz >= (width + width_dec + 1));
+
+ if (!cnt->present) {
+ fill_str(buf, bufsz, '-', width + width_dec);
+ return;
+ }
- if (!ring->size)
+ val = __pmu_calc(&cnt->val, d, t, s);
+
+ len = snprintf(buf, bufsz, "%*.*f", width + width_dec, width_dec, val);
+ if (len < 0 || len == bufsz) {
+ fill_str(buf, bufsz, 'X', width + width_dec);
return;
+ }
+}
+
+static uint64_t __pmu_read_single(int fd, uint64_t *ts)
+{
+ uint64_t data[2] = { };
+ ssize_t len;
- ring->head = ring_read(ring, RING_HEAD) & HEAD_ADDR;
- ring->tail = ring_read(ring, RING_TAIL) & TAIL_ADDR;
+ len = read(fd, data, sizeof(data));
+ assert(len == sizeof(data));
- if (ring->tail == ring->head)
- ring->idle++;
+ if (ts)
+ *ts = data[1];
- full = ring->tail - ring->head;
- if (full < 0)
- full += ring->size;
- ring->full += full;
+ return data[0];
}
-static void ring_print_header(FILE *out, struct ring *ring)
+static uint64_t pmu_read_single(int fd)
{
- fprintf(out, "%.6s%%\tops\t",
- ring->name
- );
+ return __pmu_read_single(fd, NULL);
}
-static void ring_print(struct ring *ring, unsigned long samples_per_sec)
+static void __update_sample(struct pmu_counter *counter, uint64_t val)
{
- int percent_busy, len;
+ counter->val.prev = counter->val.cur;
+ counter->val.cur = val;
+}
- if (!ring->size)
- return;
+static void update_sample(struct pmu_counter *counter, uint64_t *val)
+{
+ if (counter->present)
+ __update_sample(counter, val[counter->idx]);
+}
+
+static void pmu_sample(struct engines *engines)
+{
+ const int num_val = engines->num_counters;
+ uint64_t val[2 + num_val];
+ unsigned int i;
+
+ engines->ts.prev = engines->ts.cur;
+
+ if (engines->rapl_fd >= 0)
+ __update_sample(&engines->rapl,
+ pmu_read_single(engines->rapl_fd));
+
+ if (engines->imc_fd >= 0) {
+ pmu_read_multi(engines->imc_fd, 2, val);
+ update_sample(&engines->imc_reads, val);
+ update_sample(&engines->imc_writes, val);
+ }
- percent_busy = 100 - 100 * ring->idle / samples_per_sec;
+ engines->ts.cur = pmu_read_multi(engines->fd, num_val, val);
- len = printf("%25s busy: %3d%%: ", ring->name, percent_busy);
- print_percentage_bar (percent_busy, len);
- printf("%24s space: %d/%d\n",
- ring->name,
- (int)(ring->full / samples_per_sec),
- ring->size);
+ update_sample(&engines->freq_req, val);
+ update_sample(&engines->freq_act, val);
+ update_sample(&engines->irq, val);
+ update_sample(&engines->rc6, val);
+
+ for (i = 0; i < engines->num_engines; i++) {
+ struct engine *engine = engine_ptr(engines, i);
+
+ update_sample(&engine->busy, val);
+ update_sample(&engine->sema, val);
+ update_sample(&engine->wait, val);
+ }
}
-static void ring_log(struct ring *ring, unsigned long samples_per_sec,
- FILE *output)
+static const char *bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" };
+
+static void
+print_percentage_bar(double percent, int max_len)
{
- if (ring->size)
- fprintf(output, "%3d\t%d\t",
- (int)(100 - 100 * ring->idle / samples_per_sec),
- (int)(ring->full / samples_per_sec));
- else
- fprintf(output, "-1\t-1\t");
+ int bar_len = percent * (8 * (max_len - 2)) / 100.0;
+ int i;
+
+ putchar('|');
+
+ for (i = bar_len; i >= 8; i -= 8)
+ printf("%s", bars[8]);
+ if (i)
+ printf("%s", bars[i]);
+
+ for (i = 0; i < (max_len - 2 - (bar_len + 7) / 8); i++)
+ putchar(' ');
+
+ putchar('|');
}
+#define DEFAULT_PERIOD_MS (1000)
+
static void
usage(const char *appname)
{
printf("intel_gpu_top - Display a top-like summary of Intel GPU usage\n"
- "\n"
- "usage: %s [parameters]\n"
- "\n"
- "The following parameters apply:\n"
- "[-s <samples>] samples per seconds (default %d)\n"
- "[-e <command>] command to profile\n"
- "[-o <file>] output statistics to file. If file is '-',"
- " run in batch mode and output statistics to stdio only \n"
- "[-h] show this help screen\n"
- "\n",
- appname,
- SAMPLES_PER_SEC
- );
- return;
+ "\n"
+ "Usage: %s [parameters]\n"
+ "\n"
+ "\tThe following parameters are optional:\n\n"
+ "\t[-s <ms>] Refresh period in milliseconds (default %ums).\n"
+ "\t[-h] Show this help text.\n"
+ "\n",
+ appname, DEFAULT_PERIOD_MS);
}
int main(int argc, char **argv)
{
- uint32_t devid;
- struct pci_device *pci_dev;
- struct ring render_ring = {
- .name = "render",
- .mmio = 0x2030,
- }, bsd_ring = {
- .name = "bitstream",
- .mmio = 0x4030,
- }, bsd6_ring = {
- .name = "bitstream",
- .mmio = 0x12030,
- }, blt_ring = {
- .name = "blitter",
- .mmio = 0x22030,
- };
- int i, ch;
- int samples_per_sec = SAMPLES_PER_SEC;
- FILE *output = NULL;
- double elapsed_time=0;
- int print_headers=1;
- pid_t child_pid=-1;
- int child_stat;
- char *cmd=NULL;
- int interactive=1;
-
- /* Parse options? */
- while ((ch = getopt(argc, argv, "s:o:e:h")) != -1) {
+ unsigned int period_us = DEFAULT_PERIOD_MS * 1000;
+ int con_w = -1, con_h = -1;
+ struct engines *engines;
+ unsigned int i;
+ int ret, ch;
+
+ /* Parse options */
+ while ((ch = getopt(argc, argv, "s:h")) != -1) {
switch (ch) {
- case 'e': cmd = strdup(optarg);
- break;
- case 's': samples_per_sec = atoi(optarg);
- if (samples_per_sec < 100) {
- fprintf(stderr, "Error: samples per second must be >= 100\n");
- exit(1);
- }
- break;
- case 'o':
- if (!strcmp(optarg, "-")) {
- /* Running in non-interactive mode */
- interactive = 0;
- output = stdout;
- }
- else
- output = fopen(optarg, "w");
- if (!output)
- {
- perror("fopen");
- exit(1);
- }
+ case 's':
+ period_us = atoi(optarg) * 1000;
break;
case 'h':
usage(argv[0]);
exit(0);
- break;
default:
- fprintf(stderr, "Invalid flag %c!\n", (char)optopt);
+ fprintf(stderr, "Invalid option %c!\n", (char)optopt);
usage(argv[0]);
exit(1);
- break;
}
}
- pci_dev = intel_get_pci_device();
- devid = pci_dev->device_id;
- intel_mmio_use_pci_bar(pci_dev);
- init_instdone_definitions(devid);
-
- /* Do we have a command to run? */
- if (cmd != NULL) {
- if (output) {
- fprintf(output, "# Profiling: %s\n", cmd);
- fflush(output);
- }
- child_pid = fork();
- if (child_pid < 0) {
- perror("fork");
- exit(1);
- }
- else if (child_pid == 0) {
- int res;
- res = system(cmd);
- if (res < 0)
- perror("running command");
- if (output) {
- fflush(output);
- fprintf(output, "# %s exited with status %d\n", cmd, res);
- fflush(output);
- }
- free(cmd);
- exit(0);
- } else {
- free(cmd);
- }
+ engines = discover_engines();
+ if (!engines) {
+ fprintf(stderr,
+ "Failed to detect engines! (%s)\n(Kernel 4.16 or newer is required for i915 PMU support.)\n",
+ strerror(errno));
+ return 1;
}
- for (i = 0; i < num_instdone_bits; i++) {
- top_bits[i].bit = &instdone_bits[i];
- top_bits[i].count = 0;
- top_bits_sorted[i] = &top_bits[i];
+ ret = pmu_init(engines);
+ if (ret) {
+ fprintf(stderr,
+ "Failed to initialize PMU! (%s)\n", strerror(errno));
+ return 1;
}
- /* Grab access to the registers */
- intel_register_access_init(pci_dev, 0, -1);
+ pmu_sample(engines);
- ring_init(&render_ring);
- if (IS_GEN4(devid) || IS_GEN5(devid))
- ring_init(&bsd_ring);
- if (IS_GEN6(devid) || IS_GEN7(devid)) {
- ring_init(&bsd6_ring);
- ring_init(&blt_ring);
- }
+ for (;;) {
+ double t;
+#define BUFSZ 16
+ char freq[BUFSZ];
+ char fact[BUFSZ];
+ char irq[BUFSZ];
+ char rc6[BUFSZ];
+ char power[BUFSZ];
+ char reads[BUFSZ];
+ char writes[BUFSZ];
+ struct winsize ws;
+ int lines = 0;
- /* Initialize GPU stats */
- if (HAS_STATS_REGS(devid)) {
- for (i = 0; i < STATS_COUNT; i++) {
- uint32_t stats_high, stats_low, stats_high_2;
+ /* Update terminal size. */
+ if (ioctl(0, TIOCGWINSZ, &ws) != -1) {
+ con_w = ws.ws_col;
+ con_h = ws.ws_row;
+ }
- do {
- stats_high = INREG(stats_regs[i] + 4);
- stats_low = INREG(stats_regs[i]);
- stats_high_2 = INREG(stats_regs[i] + 4);
- } while (stats_high != stats_high_2);
+ pmu_sample(engines);
+ t = (double)(engines->ts.cur - engines->ts.prev) / 1e9;
- last_stats[i] = (uint64_t)stats_high << 32 |
- stats_low;
- }
- }
+ printf("\033[H\033[J");
- for (;;) {
- int j;
- unsigned long long t1, ti, tf, t2;
- unsigned long long def_sleep = 1000000 / samples_per_sec;
- unsigned long long last_samples_per_sec = samples_per_sec;
- unsigned short int max_lines;
- struct winsize ws;
- char clear_screen[] = {0x1b, '[', 'H',
- 0x1b, '[', 'J',
- 0x0};
- int percent;
- int len;
-
- t1 = gettime();
-
- ring_reset(&render_ring);
- ring_reset(&bsd_ring);
- ring_reset(&bsd6_ring);
- ring_reset(&blt_ring);
-
- for (i = 0; i < samples_per_sec; i++) {
- long long interval;
- ti = gettime();
- if (IS_965(devid)) {
- instdone = INREG(INSTDONE_I965);
- instdone1 = INREG(INSTDONE_1);
- } else
- instdone = INREG(INSTDONE);
-
- for (j = 0; j < num_instdone_bits; j++)
- update_idle_bit(&top_bits[j]);
-
- ring_sample(&render_ring);
- ring_sample(&bsd_ring);
- ring_sample(&bsd6_ring);
- ring_sample(&blt_ring);
-
- tf = gettime();
- if (tf - t1 >= 1000000) {
- /* We are out of sync, bail out */
- last_samples_per_sec = i+1;
- break;
- }
- interval = def_sleep - (tf - ti);
- if (interval > 0)
- usleep(interval);
- }
+ pmu_calc(&engines->freq_req, freq, BUFSZ, 4, 0, 1.0, t, 1);
+ pmu_calc(&engines->freq_act, fact, BUFSZ, 4, 0, 1.0, t, 1);
+ pmu_calc(&engines->irq, irq, BUFSZ, 8, 0, 1.0, t, 1);
+ pmu_calc(&engines->rc6, rc6, BUFSZ, 3, 0, 1e9, t, 100);
+ pmu_calc(&engines->rapl, power, BUFSZ, 4, 2, 1.0, t,
+ engines->rapl_scale);
+ pmu_calc(&engines->imc_reads, reads, BUFSZ, 6, 0, 1.0, t,
+ engines->imc_reads_scale);
+ pmu_calc(&engines->imc_writes, writes, BUFSZ, 6, 0, 1.0, t,
+ engines->imc_writes_scale);
- if (HAS_STATS_REGS(devid)) {
- for (i = 0; i < STATS_COUNT; i++) {
- uint32_t stats_high, stats_low, stats_high_2;
+ if (lines++ < con_h)
+ printf("intel-gpu-top - %s/%s MHz; %s%% RC6; %s %s; %s irqs/s\n",
+ fact, freq, rc6, power, engines->rapl_unit, irq);
- do {
- stats_high = INREG(stats_regs[i] + 4);
- stats_low = INREG(stats_regs[i]);
- stats_high_2 = INREG(stats_regs[i] + 4);
- } while (stats_high != stats_high_2);
+ if (lines++ < con_h)
+ printf("\n");
- stats[i] = (uint64_t)stats_high << 32 |
- stats_low;
- }
- }
+ if (engines->imc_fd) {
+ if (lines++ < con_h)
+ printf(" IMC reads: %s %s/s\n",
+ reads, engines->imc_reads_unit);
+
+ if (lines++ < con_h)
+ printf(" IMC writes: %s %s/s\n",
+ writes, engines->imc_writes_unit);
- qsort(top_bits_sorted, num_instdone_bits,
- sizeof(struct top_bit *), top_bits_sort);
-
- /* Limit the number of lines printed to the terminal height so the
- * most important info (at the top) will stay on screen. */
- max_lines = -1;
- if (ioctl(0, TIOCGWINSZ, &ws) != -1)
- max_lines = ws.ws_row - 6; /* exclude header lines */
- if (max_lines >= num_instdone_bits)
- max_lines = num_instdone_bits;
-
- t2 = gettime();
- elapsed_time += (t2 - t1) / 1000000.0;
-
- if (interactive) {
- printf("%s", clear_screen);
- print_clock_info(pci_dev);
-
- ring_print(&render_ring, last_samples_per_sec);
- ring_print(&bsd_ring, last_samples_per_sec);
- ring_print(&bsd6_ring, last_samples_per_sec);
- ring_print(&blt_ring, last_samples_per_sec);
-
- printf("\n%30s %s\n", "task", "percent busy");
- for (i = 0; i < max_lines; i++) {
- if (top_bits_sorted[i]->count > 0) {
- percent = (top_bits_sorted[i]->count * 100) /
- last_samples_per_sec;
- len = printf("%30s: %3d%%: ",
- top_bits_sorted[i]->bit->name,
- percent);
- print_percentage_bar (percent, len);
- } else {
- printf("%*s", PERCENTAGE_BAR_END, "");
- }
-
- if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
- printf("%13s: %llu (%lld/sec)",
- stats_reg_names[i],
- (long long)stats[i],
- (long long)(stats[i] - last_stats[i]));
- last_stats[i] = stats[i];
- } else {
- if (!top_bits_sorted[i]->count)
- break;
- }
+ if (++lines < con_h)
printf("\n");
- }
}
- if (output) {
- /* Print headers for columns at first run */
- if (print_headers) {
- fprintf(output, "# time\t");
- ring_print_header(output, &render_ring);
- ring_print_header(output, &bsd_ring);
- ring_print_header(output, &bsd6_ring);
- ring_print_header(output, &blt_ring);
- for (i = 0; i < MAX_NUM_TOP_BITS; i++) {
- if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
- fprintf(output, "%.6s\t",
- stats_reg_names[i]
- );
- }
- if (!top_bits[i].count)
- continue;
- }
- fprintf(output, "\n");
- print_headers = 0;
- }
- /* Print statistics */
- fprintf(output, "%.2f\t", elapsed_time);
- ring_log(&render_ring, last_samples_per_sec, output);
- ring_log(&bsd_ring, last_samples_per_sec, output);
- ring_log(&bsd6_ring, last_samples_per_sec, output);
- ring_log(&blt_ring, last_samples_per_sec, output);
-
- for (i = 0; i < MAX_NUM_TOP_BITS; i++) {
- if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
- fprintf(output, "%"PRIu64"\t",
- stats[i] - last_stats[i]);
- last_stats[i] = stats[i];
- }
- if (!top_bits[i].count)
- continue;
- }
- fprintf(output, "\n");
- fflush(output);
- }
+ for (i = 0; i < engines->num_engines; i++) {
+ struct engine *engine = engine_ptr(engines, i);
- for (i = 0; i < num_instdone_bits; i++) {
- top_bits_sorted[i]->count = 0;
+ if (engine->num_counters && lines < con_h) {
+ const char *a = " ENGINE BUSY ";
+ const char *b = " MI_SEMA MI_WAIT";
- if (i < STATS_COUNT)
- last_stats[i] = stats[i];
+ printf("\033[7m%s%*s%s\033[0m\n",
+ a,
+ (int)(con_w - 1 - strlen(a) - strlen(b)),
+ " ", b);
+ lines++;
+ break;
+ }
}
- /* Check if child has gone */
- if (child_pid > 0) {
- int res;
- if ((res = waitpid(child_pid, &child_stat, WNOHANG)) == -1) {
- perror("waitpid");
- exit(1);
- }
- if (res == 0)
+ for (i = 0; i < engines->num_engines && lines < con_h; i++) {
+ struct engine *engine = engine_ptr(engines, i);
+ unsigned int max_w = con_w - 1;
+ unsigned int len;
+ char sema[BUFSZ];
+ char wait[BUFSZ];
+ char busy[BUFSZ];
+ char buf[128];
+ double val;
+
+ if (!engine->num_counters)
continue;
- if (WIFEXITED(child_stat))
- break;
+
+ pmu_calc(&engine->sema, sema, BUFSZ, 3, 0, 1e9, t, 100);
+ pmu_calc(&engine->wait, wait, BUFSZ, 3, 0, 1e9, t, 100);
+ len = snprintf(buf, sizeof(buf), " %s%% %s%%",
+ sema, wait);
+
+ pmu_calc(&engine->busy, busy, BUFSZ, 6, 2, 1e9, t,
+ 100);
+ len += printf("%16s %s%% ", engine->display_name, busy);
+
+ val = __pmu_calc(&engine->busy.val, 1e9, t, 100);
+ print_percentage_bar(val, max_w - len);
+
+ printf("%s\n", buf);
+
+ lines++;
}
- }
- fclose(output);
+ if (lines++ < con_h)
+ printf("\n");
+
+ usleep(period_us);
+ }
- intel_register_access_fini();
return 0;
}
diff --git a/tools/meson.build b/tools/meson.build
index 789219d0..8ed1ccf4 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -23,7 +23,6 @@ tools_progs = [
'intel_gpu_frequency',
'intel_firmware_decode',
'intel_gpu_time',
- 'intel_gpu_top',
'intel_gtt',
'intel_guc_logger',
'intel_infoframes',
@@ -117,6 +116,11 @@ shared_library('intel_aubdump', 'aubdump.c',
name_prefix : '',
install : true)
+executable('intel_gpu_top', 'intel_gpu_top.c',
+ install : true,
+ install_rpath : rpathdir,
+ dependencies : tool_deps + [ lib_igt_perf ])
+
conf_data = configuration_data()
conf_data.set('prefix', prefix)
conf_data.set('exec_prefix', '${prefix}')