From 2e482a34870c4ab37ad3ae066e3f1b9b8e6b6688 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 20 Aug 2013 19:11:44 +0100 Subject: overlay: Use the new i915 PMU to query GPU busyness And so avoid having to hold forcewake indefinitely. Signed-off-by: Chris Wilson --- overlay/gpu-top.c | 253 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 203 insertions(+), 50 deletions(-) (limited to 'overlay/gpu-top.c') diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c index 400cbc58..ec014744 100644 --- a/overlay/gpu-top.c +++ b/overlay/gpu-top.c @@ -1,8 +1,11 @@ +#include #include -#include -#include +#include #include #include +#include +#include +#include #include "igfx.h" #include "gpu-top.h" @@ -14,52 +17,156 @@ #define RING_WAIT (1<<11) #define RING_WAIT_SEMAPHORE (1<<10) -struct ring { +#define __I915_PERF_RING(n) (4*n) +#define I915_PERF_RING_BUSY(n) (__I915_PERF_RING(n) + 0) +#define I915_PERF_RING_WAIT(n) (__I915_PERF_RING(n) + 1) +#define I915_PERF_RING_SEMA(n) (__I915_PERF_RING(n) + 2) + +static int +perf_event_open(struct perf_event_attr *attr, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags) +{ +#ifndef __NR_perf_event_open +#if defined(__i386__) +#define __NR_perf_event_open 336 +#elif defined(__x86_64__) +#define __NR_perf_event_open 298 +#else +#define __NR_perf_event_open 0 +#endif +#endif + attr->size = sizeof(*attr); + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +static uint64_t i915_type_id(void) +{ + char buf[1024]; + int fd, n; + + fd = open("/sys/bus/event_source/devices/i915/type", 0); + if (fd < 0) + return 0; + n = read(fd, buf, sizeof(buf)-1); + close(fd); + if (n < 0) + return 0; + + buf[n] = '\0'; + return strtoull(buf, 0, 0); +} + +static int perf_i915_open(int config, int group) +{ + struct perf_event_attr attr; + + memset(&attr, 0, sizeof (attr)); + + attr.type = i915_type_id(); + if (attr.type == 0) + return -ENOENT; + attr.config = config; + + attr.freq = 1; + attr.sample_freq = 1000; + + attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED; + if (group == -1) + attr.read_format |= PERF_FORMAT_GROUP; + + return perf_event_open(&attr, -1, 0, group, 0); +} + +static int perf_init(struct gpu_top *gt) +{ + const char *names[] = { + "render", + "bitstream", + "bliter", + NULL, + }; + int n; + + gt->fd = perf_i915_open(I915_PERF_RING_BUSY(0), -1); + if (gt->fd < 0) + return -1; + + if (perf_i915_open(I915_PERF_RING_WAIT(0), gt->fd) >= 0) + gt->have_wait = 1; + + if (perf_i915_open(I915_PERF_RING_SEMA(0), gt->fd) >= 0) + gt->have_sema = 1; + + gt->ring[0].name = names[0]; + gt->num_rings = 1; + + for (n = 1; names[n]; n++) { + if (perf_i915_open(I915_PERF_RING_BUSY(n), gt->fd) >= 0) { + if (gt->have_wait && + perf_i915_open(I915_PERF_RING_WAIT(n), gt->fd) < 0) + return -1; + + if (gt->have_sema && + perf_i915_open(I915_PERF_RING_SEMA(n), gt->fd) < 0) + return -1; + + gt->ring[gt->num_rings++].name = names[n]; + } + } + + return 0; +} + +struct mmio_ring { int id; - uint32_t mmio; + uint32_t base; + void *mmio; int idle, wait, sema; }; -static void *mmio; - -static uint32_t ring_read(struct ring *ring, uint32_t reg) +static uint32_t mmio_ring_read(struct mmio_ring *ring, uint32_t reg) { - return igfx_read(mmio, ring->mmio + reg); + return igfx_read(ring->mmio, reg); } -static void ring_init(struct ring *ring) +static void mmio_ring_init(struct mmio_ring *ring, void *mmio) { uint32_t ctl; - ctl = ring_read(ring, RING_CTL); + ring->mmio = (char *)mmio + ring->base; + + ctl = mmio_ring_read(ring, RING_CTL); if ((ctl & 1) == 0) ring->id = -1; } -static void ring_reset(struct ring *ring) +static void mmio_ring_reset(struct mmio_ring *ring) { ring->idle = 0; ring->wait = 0; ring->sema = 0; } -static void ring_sample(struct ring *ring) +static void mmio_ring_sample(struct mmio_ring *ring) { uint32_t head, tail, ctl; if (ring->id == -1) return; - head = ring_read(ring, RING_HEAD) & ADDR_MASK; - tail = ring_read(ring, RING_TAIL) & ADDR_MASK; + head = mmio_ring_read(ring, RING_HEAD) & ADDR_MASK; + tail = mmio_ring_read(ring, RING_TAIL) & ADDR_MASK; ring->idle += head == tail; - ctl = ring_read(ring, RING_CTL); + ctl = mmio_ring_read(ring, RING_CTL); ring->wait += !!(ctl & RING_WAIT); ring->sema += !!(ctl & RING_WAIT_SEMAPHORE); } -static void ring_emit(struct ring *ring, int samples, union gpu_top_payload *payload) +static void mmio_ring_emit(struct mmio_ring *ring, int samples, union gpu_top_payload *payload) { if (ring->id == -1) return; @@ -69,28 +176,26 @@ static void ring_emit(struct ring *ring, int samples, union gpu_top_payload *pay payload[ring->id].u.sema = 100 * ring->sema / samples; } -void gpu_top_init(struct gpu_top *gt) +static void mmio_init(struct gpu_top *gt) { - struct ring render_ring = { - .mmio = 0x2030, + struct mmio_ring render_ring = { + .base = 0x2030, .id = 0, }, bsd_ring = { - .mmio = 0x4030, + .base = 0x4030, .id = 1, }, bsd6_ring = { - .mmio = 0x12030, + .base = 0x12030, .id = 1, }, blt_ring = { - .mmio = 0x22030, + .base = 0x22030, .id = 2, }; const struct igfx_info *info; struct pci_device *igfx; + void *mmio; int fd[2], i; - memset(gt, 0, sizeof(*gt)); - gt->fd = -1; - igfx = igfx_get(); if (!igfx) return; @@ -105,6 +210,7 @@ void gpu_top_init(struct gpu_top *gt) default: fcntl(fd[0], F_SETFL, fcntl(fd[0], F_GETFL) | O_NONBLOCK); gt->fd = fd[0]; + gt->type = MMIO; gt->ring[0].name = "render"; gt->num_rings = 1; if (info->gen >= 040) { @@ -124,54 +230,101 @@ void gpu_top_init(struct gpu_top *gt) mmio = igfx_get_mmio(igfx); - ring_init(&render_ring); + mmio_ring_init(&render_ring, mmio); if (info->gen >= 060) { - ring_init(&bsd6_ring); - ring_init(&blt_ring); + mmio_ring_init(&bsd6_ring, mmio); + mmio_ring_init(&blt_ring, mmio); } else if (info->gen >= 040) { - ring_init(&bsd_ring); + mmio_ring_init(&bsd_ring, mmio); } for (;;) { union gpu_top_payload payload[MAX_RINGS]; - ring_reset(&render_ring); - ring_reset(&bsd_ring); - ring_reset(&bsd6_ring); - ring_reset(&blt_ring); + mmio_ring_reset(&render_ring); + mmio_ring_reset(&bsd_ring); + mmio_ring_reset(&bsd6_ring); + mmio_ring_reset(&blt_ring); for (i = 0; i < 1000; i++) { - ring_sample(&render_ring); - ring_sample(&bsd_ring); - ring_sample(&bsd6_ring); - ring_sample(&blt_ring); + mmio_ring_sample(&render_ring); + mmio_ring_sample(&bsd_ring); + mmio_ring_sample(&bsd6_ring); + mmio_ring_sample(&blt_ring); usleep(1000); } - ring_emit(&render_ring, 1000, payload); - ring_emit(&bsd_ring, 1000, payload); - ring_emit(&bsd6_ring, 1000, payload); - ring_emit(&blt_ring, 1000, payload); + mmio_ring_emit(&render_ring, 1000, payload); + mmio_ring_emit(&bsd_ring, 1000, payload); + mmio_ring_emit(&bsd6_ring, 1000, payload); + mmio_ring_emit(&blt_ring, 1000, payload); write(fd[1], payload, sizeof(payload)); } } +void gpu_top_init(struct gpu_top *gt) +{ + memset(gt, 0, sizeof(*gt)); + gt->fd = -1; + + if (perf_init(gt) == 0) + return; + + mmio_init(gt); +} + int gpu_top_update(struct gpu_top *gt) { uint32_t data[1024]; - int len, update = 0; + int update, len; if (gt->fd < 0) - return update; - - while ((len = read(gt->fd, data, sizeof(data))) > 0) { - uint32_t *ptr = &data[len/sizeof(uint32_t) - MAX_RINGS]; - gt->ring[0].u.payload = ptr[0]; - gt->ring[1].u.payload = ptr[1]; - gt->ring[2].u.payload = ptr[2]; - gt->ring[3].u.payload = ptr[3]; + return 0; + + if (gt->type == PERF) { + struct gpu_top_stat *s = >->stat[gt->count++&1]; + struct gpu_top_stat *d = >->stat[gt->count&1]; + uint64_t *sample, d_time; + int n; + + len = read(gt->fd, data, sizeof(data)); + if (len < 0) + return 0; + + sample = (uint64_t *)data + 1; + + s->time = *sample++; + for (n = 0; n < gt->num_rings; n++) { + s->busy[n] = sample[n]; + if (gt->have_wait) + s->wait[n] = sample[n]; + if (gt->have_sema) + s->sema[n] = sample[n]; + } + + if (gt->count == 1) + return 0; + + d_time = s->time - d->time; + for (n = 0; n < gt->num_rings; n++) { + gt->ring[n].u.u.busy = 100 * (s->busy[n] - d->busy[n]) / d_time; + if (gt->have_wait) + gt->ring[n].u.u.wait = 100 * (s->wait[n] - d->wait[n]) / d_time; + if (gt->have_sema) + gt->ring[n].u.u.sema = 100 * (s->sema[n] - d->sema[n]) / d_time; + } + update = 1; + } else { + while ((len = read(gt->fd, data, sizeof(data))) > 0) { + uint32_t *ptr = &data[len/sizeof(uint32_t) - MAX_RINGS]; + gt->ring[0].u.payload = ptr[0]; + gt->ring[1].u.payload = ptr[1]; + gt->ring[2].u.payload = ptr[2]; + gt->ring[3].u.payload = ptr[3]; + update = 1; + } } return update; -- cgit v1.2.3