summaryrefslogtreecommitdiff
path: root/overlay/gpu-top.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-08-20 19:11:44 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2013-08-20 21:30:56 +0100
commit2e482a34870c4ab37ad3ae066e3f1b9b8e6b6688 (patch)
treed127b515aaed3d3e2c6092da3db88c957c4f946c /overlay/gpu-top.c
parent5cb8c77d69ffda7fed784cd194844b4efecedf52 (diff)
overlay: Use the new i915 PMU to query GPU busyness
And so avoid having to hold forcewake indefinitely. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'overlay/gpu-top.c')
-rw-r--r--overlay/gpu-top.c253
1 files changed, 203 insertions, 50 deletions
diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
index 400cbc58..ec014744 100644
--- a/overlay/gpu-top.c
+++ b/overlay/gpu-top.c
@@ -1,8 +1,11 @@
+#include <linux/perf_event.h>
#include <stdint.h>
-#include <unistd.h>
-#include <fcntl.h>
+#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
#include "igfx.h"
#include "gpu-top.h"
@@ -14,52 +17,156 @@
#define RING_WAIT (1<<11)
#define RING_WAIT_SEMAPHORE (1<<10)
-struct ring {
+#define __I915_PERF_RING(n) (4*n)
+#define I915_PERF_RING_BUSY(n) (__I915_PERF_RING(n) + 0)
+#define I915_PERF_RING_WAIT(n) (__I915_PERF_RING(n) + 1)
+#define I915_PERF_RING_SEMA(n) (__I915_PERF_RING(n) + 2)
+
+static int
+perf_event_open(struct perf_event_attr *attr,
+ pid_t pid,
+ int cpu,
+ int group_fd,
+ unsigned long flags)
+{
+#ifndef __NR_perf_event_open
+#if defined(__i386__)
+#define __NR_perf_event_open 336
+#elif defined(__x86_64__)
+#define __NR_perf_event_open 298
+#else
+#define __NR_perf_event_open 0
+#endif
+#endif
+ attr->size = sizeof(*attr);
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static uint64_t i915_type_id(void)
+{
+ char buf[1024];
+ int fd, n;
+
+ fd = open("/sys/bus/event_source/devices/i915/type", 0);
+ if (fd < 0)
+ return 0;
+ n = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ if (n < 0)
+ return 0;
+
+ buf[n] = '\0';
+ return strtoull(buf, 0, 0);
+}
+
+static int perf_i915_open(int config, int group)
+{
+ struct perf_event_attr attr;
+
+ memset(&attr, 0, sizeof (attr));
+
+ attr.type = i915_type_id();
+ if (attr.type == 0)
+ return -ENOENT;
+ attr.config = config;
+
+ attr.freq = 1;
+ attr.sample_freq = 1000;
+
+ attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
+ if (group == -1)
+ attr.read_format |= PERF_FORMAT_GROUP;
+
+ return perf_event_open(&attr, -1, 0, group, 0);
+}
+
+static int perf_init(struct gpu_top *gt)
+{
+ const char *names[] = {
+ "render",
+ "bitstream",
+ "bliter",
+ NULL,
+ };
+ int n;
+
+ gt->fd = perf_i915_open(I915_PERF_RING_BUSY(0), -1);
+ if (gt->fd < 0)
+ return -1;
+
+ if (perf_i915_open(I915_PERF_RING_WAIT(0), gt->fd) >= 0)
+ gt->have_wait = 1;
+
+ if (perf_i915_open(I915_PERF_RING_SEMA(0), gt->fd) >= 0)
+ gt->have_sema = 1;
+
+ gt->ring[0].name = names[0];
+ gt->num_rings = 1;
+
+ for (n = 1; names[n]; n++) {
+ if (perf_i915_open(I915_PERF_RING_BUSY(n), gt->fd) >= 0) {
+ if (gt->have_wait &&
+ perf_i915_open(I915_PERF_RING_WAIT(n), gt->fd) < 0)
+ return -1;
+
+ if (gt->have_sema &&
+ perf_i915_open(I915_PERF_RING_SEMA(n), gt->fd) < 0)
+ return -1;
+
+ gt->ring[gt->num_rings++].name = names[n];
+ }
+ }
+
+ return 0;
+}
+
+struct mmio_ring {
int id;
- uint32_t mmio;
+ uint32_t base;
+ void *mmio;
int idle, wait, sema;
};
-static void *mmio;
-
-static uint32_t ring_read(struct ring *ring, uint32_t reg)
+static uint32_t mmio_ring_read(struct mmio_ring *ring, uint32_t reg)
{
- return igfx_read(mmio, ring->mmio + reg);
+ return igfx_read(ring->mmio, reg);
}
-static void ring_init(struct ring *ring)
+static void mmio_ring_init(struct mmio_ring *ring, void *mmio)
{
uint32_t ctl;
- ctl = ring_read(ring, RING_CTL);
+ ring->mmio = (char *)mmio + ring->base;
+
+ ctl = mmio_ring_read(ring, RING_CTL);
if ((ctl & 1) == 0)
ring->id = -1;
}
-static void ring_reset(struct ring *ring)
+static void mmio_ring_reset(struct mmio_ring *ring)
{
ring->idle = 0;
ring->wait = 0;
ring->sema = 0;
}
-static void ring_sample(struct ring *ring)
+static void mmio_ring_sample(struct mmio_ring *ring)
{
uint32_t head, tail, ctl;
if (ring->id == -1)
return;
- head = ring_read(ring, RING_HEAD) & ADDR_MASK;
- tail = ring_read(ring, RING_TAIL) & ADDR_MASK;
+ head = mmio_ring_read(ring, RING_HEAD) & ADDR_MASK;
+ tail = mmio_ring_read(ring, RING_TAIL) & ADDR_MASK;
ring->idle += head == tail;
- ctl = ring_read(ring, RING_CTL);
+ ctl = mmio_ring_read(ring, RING_CTL);
ring->wait += !!(ctl & RING_WAIT);
ring->sema += !!(ctl & RING_WAIT_SEMAPHORE);
}
-static void ring_emit(struct ring *ring, int samples, union gpu_top_payload *payload)
+static void mmio_ring_emit(struct mmio_ring *ring, int samples, union gpu_top_payload *payload)
{
if (ring->id == -1)
return;
@@ -69,28 +176,26 @@ static void ring_emit(struct ring *ring, int samples, union gpu_top_payload *pay
payload[ring->id].u.sema = 100 * ring->sema / samples;
}
-void gpu_top_init(struct gpu_top *gt)
+static void mmio_init(struct gpu_top *gt)
{
- struct ring render_ring = {
- .mmio = 0x2030,
+ struct mmio_ring render_ring = {
+ .base = 0x2030,
.id = 0,
}, bsd_ring = {
- .mmio = 0x4030,
+ .base = 0x4030,
.id = 1,
}, bsd6_ring = {
- .mmio = 0x12030,
+ .base = 0x12030,
.id = 1,
}, blt_ring = {
- .mmio = 0x22030,
+ .base = 0x22030,
.id = 2,
};
const struct igfx_info *info;
struct pci_device *igfx;
+ void *mmio;
int fd[2], i;
- memset(gt, 0, sizeof(*gt));
- gt->fd = -1;
-
igfx = igfx_get();
if (!igfx)
return;
@@ -105,6 +210,7 @@ void gpu_top_init(struct gpu_top *gt)
default:
fcntl(fd[0], F_SETFL, fcntl(fd[0], F_GETFL) | O_NONBLOCK);
gt->fd = fd[0];
+ gt->type = MMIO;
gt->ring[0].name = "render";
gt->num_rings = 1;
if (info->gen >= 040) {
@@ -124,54 +230,101 @@ void gpu_top_init(struct gpu_top *gt)
mmio = igfx_get_mmio(igfx);
- ring_init(&render_ring);
+ mmio_ring_init(&render_ring, mmio);
if (info->gen >= 060) {
- ring_init(&bsd6_ring);
- ring_init(&blt_ring);
+ mmio_ring_init(&bsd6_ring, mmio);
+ mmio_ring_init(&blt_ring, mmio);
} else if (info->gen >= 040) {
- ring_init(&bsd_ring);
+ mmio_ring_init(&bsd_ring, mmio);
}
for (;;) {
union gpu_top_payload payload[MAX_RINGS];
- ring_reset(&render_ring);
- ring_reset(&bsd_ring);
- ring_reset(&bsd6_ring);
- ring_reset(&blt_ring);
+ mmio_ring_reset(&render_ring);
+ mmio_ring_reset(&bsd_ring);
+ mmio_ring_reset(&bsd6_ring);
+ mmio_ring_reset(&blt_ring);
for (i = 0; i < 1000; i++) {
- ring_sample(&render_ring);
- ring_sample(&bsd_ring);
- ring_sample(&bsd6_ring);
- ring_sample(&blt_ring);
+ mmio_ring_sample(&render_ring);
+ mmio_ring_sample(&bsd_ring);
+ mmio_ring_sample(&bsd6_ring);
+ mmio_ring_sample(&blt_ring);
usleep(1000);
}
- ring_emit(&render_ring, 1000, payload);
- ring_emit(&bsd_ring, 1000, payload);
- ring_emit(&bsd6_ring, 1000, payload);
- ring_emit(&blt_ring, 1000, payload);
+ mmio_ring_emit(&render_ring, 1000, payload);
+ mmio_ring_emit(&bsd_ring, 1000, payload);
+ mmio_ring_emit(&bsd6_ring, 1000, payload);
+ mmio_ring_emit(&blt_ring, 1000, payload);
write(fd[1], payload, sizeof(payload));
}
}
+void gpu_top_init(struct gpu_top *gt)
+{
+ memset(gt, 0, sizeof(*gt));
+ gt->fd = -1;
+
+ if (perf_init(gt) == 0)
+ return;
+
+ mmio_init(gt);
+}
+
int gpu_top_update(struct gpu_top *gt)
{
uint32_t data[1024];
- int len, update = 0;
+ int update, len;
if (gt->fd < 0)
- return update;
-
- while ((len = read(gt->fd, data, sizeof(data))) > 0) {
- uint32_t *ptr = &data[len/sizeof(uint32_t) - MAX_RINGS];
- gt->ring[0].u.payload = ptr[0];
- gt->ring[1].u.payload = ptr[1];
- gt->ring[2].u.payload = ptr[2];
- gt->ring[3].u.payload = ptr[3];
+ return 0;
+
+ if (gt->type == PERF) {
+ struct gpu_top_stat *s = &gt->stat[gt->count++&1];
+ struct gpu_top_stat *d = &gt->stat[gt->count&1];
+ uint64_t *sample, d_time;
+ int n;
+
+ len = read(gt->fd, data, sizeof(data));
+ if (len < 0)
+ return 0;
+
+ sample = (uint64_t *)data + 1;
+
+ s->time = *sample++;
+ for (n = 0; n < gt->num_rings; n++) {
+ s->busy[n] = sample[n];
+ if (gt->have_wait)
+ s->wait[n] = sample[n];
+ if (gt->have_sema)
+ s->sema[n] = sample[n];
+ }
+
+ if (gt->count == 1)
+ return 0;
+
+ d_time = s->time - d->time;
+ for (n = 0; n < gt->num_rings; n++) {
+ gt->ring[n].u.u.busy = 100 * (s->busy[n] - d->busy[n]) / d_time;
+ if (gt->have_wait)
+ gt->ring[n].u.u.wait = 100 * (s->wait[n] - d->wait[n]) / d_time;
+ if (gt->have_sema)
+ gt->ring[n].u.u.sema = 100 * (s->sema[n] - d->sema[n]) / d_time;
+ }
+
update = 1;
+ } else {
+ while ((len = read(gt->fd, data, sizeof(data))) > 0) {
+ uint32_t *ptr = &data[len/sizeof(uint32_t) - MAX_RINGS];
+ gt->ring[0].u.payload = ptr[0];
+ gt->ring[1].u.payload = ptr[1];
+ gt->ring[2].u.payload = ptr[2];
+ gt->ring[3].u.payload = ptr[3];
+ update = 1;
+ }
}
return update;